Wed Sep 10 02:46:23 PDT 2014

MAINTAINERS                       |    2 
 block.c                           |    2 
 block/archipelago.c               |   76 +---
 block/vmdk.c                      |    4 
 blockdev.c                        |   30 -
 default-configs/ppc-softmmu.mak   |    4 
 default-configs/ppc64-softmmu.mak |    3 
 hw/block/pflash_cfi01.c           |   18 -
 hw/core/loader.c                  |   21 +
 hw/ide/core.c                     |  111 +++++-
 hw/misc/macio/cuda.c              |   23 +
 hw/misc/macio/macio.c             |   19 +
 hw/net/spapr_llan.c               |   13 
 hw/nvram/mac_nvram.c              |   70 ++--
 hw/pci-host/apb.c                 |   15 
 hw/ppc/mac.h                      |    4 
 hw/ppc/mac_newworld.c             |   32 +
 hw/ppc/mac_oldworld.c             |   20 +
 hw/ppc/spapr.c                    |  188 +++++++----
 hw/ppc/spapr_hcall.c              |   22 -
 hw/ppc/spapr_pci.c                |   61 +--
 hw/ppc/spapr_rtas.c               |   15 
 include/hw/loader.h               |    1 
 include/hw/pci-host/spapr.h       |    2 
 include/hw/ppc/ppc.h              |    2 
 include/hw/ppc/spapr.h            |    8 
 include/qapi/util.h               |   17 +
 include/sysemu/kvm.h              |    2 
 kvm-all.c                         |   13 
 linux-user/signal.c               |  184 +++++++---
 qapi/Makefile.objs                |    2 
 qapi/qapi-util.c                  |   34 ++
 qemu-img.c                        |    3 
 qemu-img.texi                     |    7 
 qemu-nbd.c                        |  100 +++--
 target-ppc/helper.h               |    1 
 target-ppc/int_helper.c           |   19 -
 target-ppc/kvm.c                  |  371 ++++++++++++++++++++--
 target-ppc/kvm_ppc.h              |   12 
 target-ppc/translate.c            |  123 ++++---
 tests/Makefile                    |    3 
 tests/ide-test.c                  |    2 
 tests/libqos/malloc-pc.c          |  280 ++++++++++++++++
 tests/libqos/malloc-pc.h          |    9 
 tests/libqos/pci.c                |  111 ++++++
 tests/libqos/pci.h                |   10 
 tests/libqos/virtio-pci.c         |  343 ++++++++++++++++++++
 tests/libqos/virtio-pci.h         |   61 +++
 tests/libqos/virtio.c             |  257 +++++++++++++++
 tests/libqos/virtio.h             |  182 ++++++++++
 tests/libqtest.c                  |   48 ++
 tests/libqtest.h                  |    7 
 tests/virtio-blk-test.c           |  640 +++++++++++++++++++++++++++++++++++++-
 53 files changed, 3136 insertions(+), 471 deletions(-)

New commits:
commit 10601bef566461efbea81ee344ed3355ce71954a
Merge: 1bc0e40 de739df
Author: Peter Maydell <peter.maydell at linaro.org>
Date:   Tue Sep 9 15:08:05 2014 +0100

    Merge remote-tracking branch 'remotes/mcayland/tags/qemu-sparc-signed' into staging
    
    apb: implement PCI bus error interrupt map registers
    
    # gpg: Signature made Tue 09 Sep 2014 06:09:27 BST using RSA key ID AE0F321F
    # gpg: Can't check signature: public key not found
    
    * remotes/mcayland/tags/qemu-sparc-signed:
      apb: implement PCI bus error interrupt map registers
    
    Signed-off-by: Peter Maydell <peter.maydell at linaro.org>

commit de739df8e0032d441a57ec1a8f9ab8e9bf72cef0
Author: Mark Cave-Ayland <mark.cave-ayland at ilande.co.uk>
Date:   Fri Sep 5 14:50:56 2014 +0100

    apb: implement PCI bus error interrupt map registers
    
    Both OpenBSD and FreeBSD SPARC64 attempt to read the interrupt map from the
    hardware and will fail if the correct ino isn't present.
    
    Signed-off-by: Mark Cave-Ayland <mark.cave-ayland at ilande.co.uk>

diff --git a/hw/pci-host/apb.c b/hw/pci-host/apb.c
index 762ebdd..f573875 100644
--- a/hw/pci-host/apb.c
+++ b/hw/pci-host/apb.c
@@ -142,6 +142,7 @@ typedef struct APBState {
     IOMMUState iommu;
     uint32_t pci_control[16];
     uint32_t pci_irq_map[8];
+    uint32_t pci_err_irq_map[4];
     uint32_t obio_irq_map[32];
     qemu_irq *pbm_irqs;
     qemu_irq *ivec_irqs;
@@ -437,7 +438,7 @@ static void apb_config_writel (void *opaque, hwaddr addr,
             pbm_check_irqs(s);
         }
         break;
-    case 0x1000 ... 0x1080: /* OBIO interrupt control */
+    case 0x1000 ... 0x107f: /* OBIO interrupt control */
         if (addr & 4) {
             unsigned int ino = ((addr & 0xff) >> 3);
             s->obio_irq_map[ino] &= PBM_PCI_IMR_MASK;
@@ -515,13 +516,20 @@ static uint64_t apb_config_readl (void *opaque,
             val = 0;
         }
         break;
-    case 0x1000 ... 0x1080: /* OBIO interrupt control */
+    case 0x1000 ... 0x107f: /* OBIO interrupt control */
         if (addr & 4) {
             val = s->obio_irq_map[(addr & 0xff) >> 3];
         } else {
             val = 0;
         }
         break;
+    case 0x1080 ... 0x108f: /* PCI bus error */
+        if (addr & 4) {
+            val = s->pci_err_irq_map[(addr & 0xf) >> 3];
+        } else {
+            val = 0;
+        }
+        break;
     case 0x2000 ... 0x202f: /* PCI control */
         val = s->pci_control[(addr & 0x3f) >> 2];
         break;
@@ -753,6 +761,9 @@ static int pci_pbm_init_device(SysBusDevice *dev)
     for (i = 0; i < 8; i++) {
         s->pci_irq_map[i] = (0x1f << 6) | (i << 2);
     }
+    for (i = 0; i < 2; i++) {
+        s->pci_err_irq_map[i] = (0x1f << 6) | 0x30;
+    }
     for (i = 0; i < 32; i++) {
         s->obio_irq_map[i] = ((0x1f << 6) | 0x20) + i;
     }
commit 1bc0e405816c9c6bde5695af20b07a1491ce1f9f
Merge: 2d6838e 01ce352
Author: Peter Maydell <peter.maydell at linaro.org>
Date:   Mon Sep 8 13:14:41 2014 +0100

    Merge remote-tracking branch 'remotes/stefanha/tags/block-pull-request' into staging
    
    Block pull request
    
    # gpg: Signature made Mon 08 Sep 2014 11:49:31 BST using RSA key ID 81AB73C8
    # gpg: Good signature from "Stefan Hajnoczi <stefanha at redhat.com>"
    # gpg:                 aka "Stefan Hajnoczi <stefanha at gmail.com>"
    
    * remotes/stefanha/tags/block-pull-request: (24 commits)
      ide: Add resize callback to ide/core
      IDE: Fill the IDENTIFY request consistently
      vmdk: fix buf leak in vmdk_parse_extents()
      vmdk: fix vmdk_parse_extents() extent_file leaks
      ide: Add wwn support to IDE-ATAPI drive
      qtest/ide: Uninitialize PC allocator
      libqos: add a simple first-fit memory allocator
      MAINTAINERS: update sheepdog maintainer
      qemu-nbd: fix indentation and coding style
      qemu-nbd: add option to set detect-zeroes mode
      rename parse_enum_option to qapi_enum_parse and make it public
      block/archipelago: Use QEMU atomic builtins
      qemu-img: fix rebase src_cache option documentation
      qemu-img: clarify src_cache option documentation
      libqos: Added EVENT_IDX support
      libqos: Added MSI-X support
      libqos: Added test case for configuration changes in virtio-blk test
      libqos: Added indirect descriptor support to virtio implementation
      libqos: Added basic virtqueue support to virtio implementation
      tests: Add virtio device initialization
      ...
    
    Signed-off-by: Peter Maydell <peter.maydell at linaro.org>

commit 2d6838e86ce942f886401818b48d77e575a5f7de
Merge: f102f22 85423d9
Author: Peter Maydell <peter.maydell at linaro.org>
Date:   Mon Sep 8 12:02:07 2014 +0100

    Merge remote-tracking branch 'remotes/agraf/tags/signed-ppc-for-upstream' into staging
    
    Patch queue for ppc - 2014-09-08
    
    Alexander Graf (11):
          PPC: KVM: Fix g3beige and mac99 when HV is loaded
          PPC: mac99: Move NVRAM to page boundary when necessary
          KVM: Add helper to run KVM_CHECK_EXTENSION on vm fd
          PPC: KVM: Use vm check_extension for pv hcall
          PPC: mac99: Fix core99 timer frequency
          PPC: mac_nvram: Remove unused functions
          PPC: mac_nvram: Allow 2 and 4 byte accesses
          PPC: mac_nvram: Split NVRAM into OF and OSX parts
          PPC: Mac: Move tbfreq into local variable
          PPC: Cuda: Use cuda timer to expose tbfreq to guest
          PPC: Fix default config ordering and add eTSEC for ppc64
    
    Alexey Kardashevskiy (7):
          spapr: Move DT memory node rendering to a helper
          spapr: Use DT memory node rendering helper for other nodes
          spapr: Refactor spapr_populate_memory() to allow memoryless nodes
          spapr: Split memory nodes to power-of-two blocks
          spapr: Add a helper for node0_size calculation
          spapr: Fix ibm, associativity for memory nodes
          spapr_pci: Fix config space corruption
    
    Anton Blanchard (2):
          spapr-vlan: Don't touch last entry in buffer list
          hypervisor property clashes with hypervisor node
    
    Benjamin Herrenschmidt (2):
          loader: Add load_image_size() to replace load_image()
          spapr: Locate RTAS and device-tree based on real RMA
    
    Bharat Bhushan (4):
          ppc: debug stub: Get trap instruction opcode from KVM
          ppc: synchronize excp_vectors for injecting exception
          ppc: Add software breakpoint support
          ppc: Add hw breakpoint watchpoint support
    
    Gonglei (1):
          spapr: fix possible memory leak
    
    Greg Kurz (1):
          spapr_pci: map the MSI window in each PHB
    
    Nikunj A Dadhania (3):
          ppc: spapr-rtas - implement os-term rtas call
          spapr: add uuid/host details to device tree
          ppc/spapr: Fix MAX_CPUS to 255
    
    Peter Maydell (1):
          hw/ppc/spapr_hcall.c: Fix typo in function names
    
    Tom Musta (20):
          linux-user: Fix Stack Pointer Bug in PPC setup_rt_frame
          linux-user: Split PPC Trampoline Encoding from Register Save
          linux-user: Enable Signal Handlers on PPC64
          linux-user: Properly Dereference PPC64 ELFv1 Signal Handler Pointer
          linux-user: Implement do_setcontext for PPC64
          linux-user: Handle PPC64 ELFv2 Function Pointers
          target-ppc: Bug Fix: rlwinm
          target-ppc: Bug Fix: rlwnm
          target-ppc: Bug Fix: rlwimi
          target-ppc: Bug Fix: mullwo
          target-ppc: Bug Fix: mullw
          target-ppc: Bug Fix: mulldo OV Detection
          target-ppc: Bug Fix: srawi
          target-ppc: Bug Fix: srad
          target-ppc: Special Case of rlwimi Should Use Deposit
          target-ppc: Optimize rlwinm MB=0 ME=31
          target-ppc: Optimize rlwnm MB=0 ME=31
          target-ppc: Clean Up mullw
          target-ppc: Clean up mullwo
          target-ppc: Implement mulldo with TCG
    
    # gpg: Signature made Mon 08 Sep 2014 11:51:15 BST using RSA key ID 03FEDC60
    # gpg: Can't check signature: public key not found
    
    * remotes/agraf/tags/signed-ppc-for-upstream: (52 commits)
      hypervisor property clashes with hypervisor node
      PPC: Fix default config ordering and add eTSEC for ppc64
      spapr_pci: map the MSI window in each PHB
      target-ppc: Implement mulldo with TCG
      target-ppc: Clean up mullwo
      target-ppc: Clean Up mullw
      target-ppc: Optimize rlwnm MB=0 ME=31
      target-ppc: Optimize rlwinm MB=0 ME=31
      target-ppc: Special Case of rlwimi Should Use Deposit
      spapr-vlan: Don't touch last entry in buffer list
      spapr_pci: Fix config space corruption
      PPC: Cuda: Use cuda timer to expose tbfreq to guest
      PPC: Mac: Move tbfreq into local variable
      PPC: mac_nvram: Split NVRAM into OF and OSX parts
      PPC: mac_nvram: Allow 2 and 4 byte accesses
      PPC: mac_nvram: Remove unused functions
      PPC: mac99: Fix core99 timer frequency
      PPC: KVM: Use vm check_extension for pv hcall
      KVM: Add helper to run KVM_CHECK_EXTENSION on vm fd
      target-ppc: Bug Fix: srad
      ...
    
    Signed-off-by: Peter Maydell <peter.maydell at linaro.org>

commit 85423d90c7bdbbae3d97ed3a12b5db79d00a3fb0
Author: Anton Blanchard <anton at samba.org>
Date:   Sat Aug 30 15:55:21 2014 +1000

    hypervisor property clashes with hypervisor node
    
    dtc fails on a recent QEMU snapshot:
    
    ERROR (name_properties): "name" property in /hypervisor#1 is incorrect ("hypervisor" instead of base node name)
    
    Looking at the device tree we have a hypervisor property:
    
    # lsprop hypervisor
    hypervisor       "kvm"
    
    But we also have a hypervisor node, with a name that doesn't match:
    
    # lsprop hypervisor#1/
    name             "hypervisor"
    compatible       "linux,kvm"
    linux,phandle    7e5eb5d8 (2120136152)
    
    Commit c08ce91d309c (spapr: add uuid/host details to device tree)
    looks to have collided with an earlier patch. Remove the hypervisor
    property.
    
    Signed-off-by: Anton Blanchard <anton at samba.org>
    Signed-off-by: Alexander Graf <agraf at suse.de>

diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 65b28ac..2ab4460 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -363,10 +363,6 @@ static void *spapr_create_fdt_skel(hwaddr initrd_base,
     _FDT((fdt_property_string(fdt, "model", "IBM pSeries (emulated by qemu)")));
     _FDT((fdt_property_string(fdt, "compatible", "qemu,pseries")));
 
-    if (kvm_enabled()) {
-        _FDT((fdt_property_string(fdt, "hypervisor", "kvm")));
-    }
-
     /*
      * Add info to guest to indentify which host is it being run on
      * and what is the uuid of the guest
commit 4a761ffa37d20a5d3ecbb95b45b4c42e54c8f18e
Author: Alexander Graf <agraf at suse.de>
Date:   Wed Jul 2 19:01:46 2014 +0200

    PPC: Fix default config ordering and add eTSEC for ppc64
    
    We messed up the ordering in our default configs for PPC. The top entries
    are generic entries, then come sections that indicate that features are only
    in because of a special feature (such as PReP).
    
    Fix the ordering again and while at it add eTSEC support to the ppc64 target
    so that we can spawn eTSEC adapters with qemu-system-ppc64.
    
    Signed-off-by: Alexander Graf <agraf at suse.de>

diff --git a/default-configs/ppc-softmmu.mak b/default-configs/ppc-softmmu.mak
index 33f8d84..d725b23 100644
--- a/default-configs/ppc-softmmu.mak
+++ b/default-configs/ppc-softmmu.mak
@@ -45,8 +45,8 @@ CONFIG_PREP=y
 CONFIG_MAC=y
 CONFIG_E500=y
 CONFIG_OPENPIC_KVM=$(and $(CONFIG_E500),$(CONFIG_KVM))
+CONFIG_ETSEC=y
+CONFIG_LIBDECNUMBER=y
 # For PReP
 CONFIG_MC146818RTC=y
-CONFIG_ETSEC=y
 CONFIG_ISA_TESTDEV=y
-CONFIG_LIBDECNUMBER=y
diff --git a/default-configs/ppc64-softmmu.mak b/default-configs/ppc64-softmmu.mak
index 37a15b7..bd30d69 100644
--- a/default-configs/ppc64-softmmu.mak
+++ b/default-configs/ppc64-softmmu.mak
@@ -46,6 +46,8 @@ CONFIG_PREP=y
 CONFIG_MAC=y
 CONFIG_E500=y
 CONFIG_OPENPIC_KVM=$(and $(CONFIG_E500),$(CONFIG_KVM))
+CONFIG_ETSEC=y
+CONFIG_LIBDECNUMBER=y
 # For pSeries
 CONFIG_XICS=$(CONFIG_PSERIES)
 CONFIG_XICS_KVM=$(and $(CONFIG_PSERIES),$(CONFIG_KVM))
@@ -58,4 +60,3 @@ CONFIG_I82374=y
 CONFIG_I8257=y
 CONFIG_MC146818RTC=y
 CONFIG_ISA_TESTDEV=y
-CONFIG_LIBDECNUMBER=y
commit 8c46f7ec85a4dd9663489b2fa2b425cd7b3653e1
Author: Greg Kurz <gkurz at linux.vnet.ibm.com>
Date:   Wed Aug 27 18:17:12 2014 +0200

    spapr_pci: map the MSI window in each PHB
    
    On sPAPR, virtio devices are connected to the PCI bus and use MSI-X.
    Commit cc943c36faa192cd4b32af8fe5edb31894017d35 has modified MSI-X
    so that writes are made using the bus master address space and follow
    the IOMMU path.
    
    Unfortunately, the IOMMU address space address space does not have an
    MSI window: the notification is silently dropped in unassigned_mem_write
    instead of reaching the guest... The most visible effect is that all
    virtio devices are non-functional on sPAPR since then. :(
    
    This patch does the following:
    1) map the MSI window into the IOMMU address space for each PHB
       - since each PHB instantiates its own IOMMU address space, we
         can safely map the window at a fixed address (SPAPR_PCI_MSI_WINDOW)
       - no real need to keep the MSI window setup in a separate function,
         the spapr_pci_msi_init() code moves to spapr_phb_realize().
    
    2) kill the global MSI window as it is not needed in the end
    
    Signed-off-by: Greg Kurz <gkurz at linux.vnet.ibm.com>
    Signed-off-by: Alexander Graf <agraf at suse.de>

diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 555a007..65b28ac 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -1441,7 +1441,6 @@ static void ppc_spapr_init(MachineState *machine)
     spapr_create_nvram(spapr);
 
     /* Set up PCI */
-    spapr_pci_msi_init(spapr, SPAPR_PCI_MSI_WINDOW);
     spapr_pci_rtas_init();
 
     phb = spapr_create_phb(spapr, 0);
diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c
index e894f07..ad0da7f 100644
--- a/hw/ppc/spapr_pci.c
+++ b/hw/ppc/spapr_pci.c
@@ -345,7 +345,7 @@ static void rtas_ibm_change_msi(PowerPCCPU *cpu, sPAPREnvironment *spapr,
     }
 
     /* Setup MSI/MSIX vectors in the device (via cfgspace or MSIX BAR) */
-    spapr_msi_setmsg(pdev, spapr->msi_win_addr, ret_intr_type == RTAS_TYPE_MSIX,
+    spapr_msi_setmsg(pdev, SPAPR_PCI_MSI_WINDOW, ret_intr_type == RTAS_TYPE_MSIX,
                      irq, req_num);
 
     /* Add MSI device to cache */
@@ -469,34 +469,6 @@ static const MemoryRegionOps spapr_msi_ops = {
     .endianness = DEVICE_LITTLE_ENDIAN
 };
 
-void spapr_pci_msi_init(sPAPREnvironment *spapr, hwaddr addr)
-{
-    uint64_t window_size = 4096;
-
-    /*
-     * As MSI/MSIX interrupts trigger by writing at MSI/MSIX vectors,
-     * we need to allocate some memory to catch those writes coming
-     * from msi_notify()/msix_notify().
-     * As MSIMessage:addr is going to be the same and MSIMessage:data
-     * is going to be a VIRQ number, 4 bytes of the MSI MR will only
-     * be used.
-     *
-     * For KVM we want to ensure that this memory is a full page so that
-     * our memory slot is of page size granularity.
-     */
-#ifdef CONFIG_KVM
-    if (kvm_enabled()) {
-        window_size = getpagesize();
-    }
-#endif
-
-    spapr->msi_win_addr = addr;
-    memory_region_init_io(&spapr->msiwindow, NULL, &spapr_msi_ops, spapr,
-                          "msi", window_size);
-    memory_region_add_subregion(get_system_memory(), spapr->msi_win_addr,
-                                &spapr->msiwindow);
-}
-
 /*
  * PHB PCI device
  */
@@ -516,6 +488,7 @@ static void spapr_phb_realize(DeviceState *dev, Error **errp)
     char *namebuf;
     int i;
     PCIBus *bus;
+    uint64_t msi_window_size = 4096;
 
     if (sphb->index != -1) {
         hwaddr windows_base;
@@ -608,6 +581,28 @@ static void spapr_phb_realize(DeviceState *dev, Error **errp)
     address_space_init(&sphb->iommu_as, &sphb->iommu_root,
                        sphb->dtbusname);
 
+    /*
+     * As MSI/MSIX interrupts trigger by writing at MSI/MSIX vectors,
+     * we need to allocate some memory to catch those writes coming
+     * from msi_notify()/msix_notify().
+     * As MSIMessage:addr is going to be the same and MSIMessage:data
+     * is going to be a VIRQ number, 4 bytes of the MSI MR will only
+     * be used.
+     *
+     * For KVM we want to ensure that this memory is a full page so that
+     * our memory slot is of page size granularity.
+     */
+#ifdef CONFIG_KVM
+    if (kvm_enabled()) {
+        msi_window_size = getpagesize();
+    }
+#endif
+
+    memory_region_init_io(&sphb->msiwindow, NULL, &spapr_msi_ops, spapr,
+                          "msi", msi_window_size);
+    memory_region_add_subregion(&sphb->iommu_root, SPAPR_PCI_MSI_WINDOW,
+                                &sphb->msiwindow);
+
     pci_setup_iommu(bus, spapr_pci_dma_iommu, sphb);
 
     pci_bus_set_route_irq_fn(bus, spapr_route_intx_pin_to_irq);
diff --git a/include/hw/pci-host/spapr.h b/include/hw/pci-host/spapr.h
index 32f0aa7..4ea2a0d 100644
--- a/include/hw/pci-host/spapr.h
+++ b/include/hw/pci-host/spapr.h
@@ -70,7 +70,7 @@ struct sPAPRPHBState {
 
     MemoryRegion memspace, iospace;
     hwaddr mem_win_addr, mem_win_size, io_win_addr, io_win_size;
-    MemoryRegion memwindow, iowindow;
+    MemoryRegion memwindow, iowindow, msiwindow;
 
     uint32_t dma_liobn;
     AddressSpace iommu_as;
diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
index 36e8e51..749daf4 100644
--- a/include/hw/ppc/spapr.h
+++ b/include/hw/ppc/spapr.h
@@ -13,8 +13,6 @@ struct sPAPRNVRAM;
 typedef struct sPAPREnvironment {
     struct VIOsPAPRBus *vio_bus;
     QLIST_HEAD(, sPAPRPHBState) phbs;
-    hwaddr msi_win_addr;
-    MemoryRegion msiwindow;
     struct sPAPRNVRAM *nvram;
     XICSState *icp;
 
commit 22ffad31d453a82aa290bf196f904580198e8e66
Author: Tom Musta <tommusta at gmail.com>
Date:   Mon Aug 25 14:25:44 2014 -0500

    target-ppc: Implement mulldo with TCG
    
    Optimize mulldo by using the muls2_i64 operation rather than a helper.  Eliminate
    the obsolete helper code.
    
    Signed-off-by: Tom Musta <tommusta at gmail.com>
    Suggested-by: Richard Henderson <rth at twiddle.net>
    Reviewed-by: Richard Henderson <rth at twiddle.net>
    Signed-off-by: Alexander Graf <agraf at suse.de>

diff --git a/target-ppc/helper.h b/target-ppc/helper.h
index 509eae5..0cfdc8a 100644
--- a/target-ppc/helper.h
+++ b/target-ppc/helper.h
@@ -28,7 +28,6 @@ DEF_HELPER_2(icbi, void, env, tl)
 DEF_HELPER_5(lscbx, tl, env, tl, i32, i32, i32)
 
 #if defined(TARGET_PPC64)
-DEF_HELPER_3(mulldo, i64, env, i64, i64)
 DEF_HELPER_4(divdeu, i64, env, i64, i64, i32)
 DEF_HELPER_4(divde, i64, env, i64, i64, i32)
 #endif
diff --git a/target-ppc/int_helper.c b/target-ppc/int_helper.c
index e5b103b..713d777 100644
--- a/target-ppc/int_helper.c
+++ b/target-ppc/int_helper.c
@@ -24,33 +24,6 @@
 #include "helper_regs.h"
 /*****************************************************************************/
 /* Fixed point operations helpers */
-#if defined(TARGET_PPC64)
-
-uint64_t helper_mulldo(CPUPPCState *env, uint64_t arg1, uint64_t arg2)
-{
-    int64_t th;
-    uint64_t tl;
-
-    muls64(&tl, (uint64_t *)&th, arg1, arg2);
-
-    /* th should either contain all 1 bits or all 0 bits and should
-     * match the sign bit of tl; otherwise we have overflowed. */
-
-    if ((int64_t)tl < 0) {
-        if (likely(th == -1LL)) {
-            env->ov = 0;
-        } else {
-            env->so = env->ov = 1;
-        }
-    } else if (likely(th == 0LL)) {
-        env->ov = 0;
-    } else {
-        env->so = env->ov = 1;
-    }
-
-    return (int64_t)tl;
-}
-#endif
 
 target_ulong helper_divweu(CPUPPCState *env, target_ulong ra, target_ulong rb,
                            uint32_t oe)
diff --git a/target-ppc/translate.c b/target-ppc/translate.c
index 1062634..d03daea 100644
--- a/target-ppc/translate.c
+++ b/target-ppc/translate.c
@@ -1215,8 +1215,20 @@ static void gen_mulld(DisasContext *ctx)
 /* mulldo  mulldo. */
 static void gen_mulldo(DisasContext *ctx)
 {
-    gen_helper_mulldo(cpu_gpr[rD(ctx->opcode)], cpu_env,
-                      cpu_gpr[rA(ctx->opcode)], cpu_gpr[rB(ctx->opcode)]);
+    TCGv_i64 t0 = tcg_temp_new_i64();
+    TCGv_i64 t1 = tcg_temp_new_i64();
+
+    tcg_gen_muls2_i64(t0, t1, cpu_gpr[rA(ctx->opcode)],
+                      cpu_gpr[rB(ctx->opcode)]);
+    tcg_gen_mov_i64(cpu_gpr[rD(ctx->opcode)], t0);
+
+    tcg_gen_sari_i64(t0, t0, 63);
+    tcg_gen_setcond_i64(TCG_COND_NE, cpu_ov, t0, t1);
+    tcg_gen_or_tl(cpu_so, cpu_so, cpu_ov);
+
+    tcg_temp_free_i64(t0);
+    tcg_temp_free_i64(t1);
+
     if (unlikely(Rc(ctx->opcode) != 0)) {
         gen_set_Rc0(ctx, cpu_gpr[rD(ctx->opcode)]);
     }
commit 269778769d4d24c511bc3d5f95eeb2e92dcf1868
Author: Tom Musta <tommusta at gmail.com>
Date:   Mon Aug 25 14:25:43 2014 -0500

    target-ppc: Clean up mullwo
    
    Simplify the implementation of mullwo.  For 64 bit CPUs, the result is
    the concatenation of the upper and lower parts of the muls2_i32 operation,
    which may be slightly better than deposit.  For 32 bit CPUs, the lower part
    of the muls_i32 operation is moved into the target GPR.
    
    Signed-off-by: Tom Musta <tommusta at gmail.com>
    Suggested-by: Richard Henderson <rth at twiddle.net>
    Reviewed-by: Richard Henderson <rth at twiddle.net>
    Signed-off-by: Alexander Graf <agraf at suse.de>

diff --git a/target-ppc/translate.c b/target-ppc/translate.c
index ced295f..1062634 100644
--- a/target-ppc/translate.c
+++ b/target-ppc/translate.c
@@ -1150,19 +1150,14 @@ static void gen_mullwo(DisasContext *ctx)
 {
     TCGv_i32 t0 = tcg_temp_new_i32();
     TCGv_i32 t1 = tcg_temp_new_i32();
-#if defined(TARGET_PPC64)
-    TCGv_i64 t2 = tcg_temp_new_i64();
-#endif
 
     tcg_gen_trunc_tl_i32(t0, cpu_gpr[rA(ctx->opcode)]);
     tcg_gen_trunc_tl_i32(t1, cpu_gpr[rB(ctx->opcode)]);
     tcg_gen_muls2_i32(t0, t1, t0, t1);
-    tcg_gen_ext_i32_tl(cpu_gpr[rD(ctx->opcode)], t0);
 #if defined(TARGET_PPC64)
-    tcg_gen_ext_i32_tl(t2, t1);
-    tcg_gen_deposit_i64(cpu_gpr[rD(ctx->opcode)],
-                        cpu_gpr[rD(ctx->opcode)], t2, 32, 32);
-    tcg_temp_free(t2);
+    tcg_gen_concat_i32_i64(cpu_gpr[rD(ctx->opcode)], t0, t1);
+#else
+    tcg_gen_mov_i32(cpu_gpr[rD(ctx->opcode)], t0);
 #endif
 
     tcg_gen_sari_i32(t0, t0, 31);
commit 03039e5ef0b92cb3ec89ff2caa5b57fa6bf12a88
Author: Tom Musta <tommusta at gmail.com>
Date:   Mon Aug 25 14:25:42 2014 -0500

    target-ppc: Clean Up mullw
    
    Eliminate the unecessary ext32s TCG operation and make the multiplication
    operation explicitly 32 bit.
    
    Signed-off-by: Tom Musta <tommusta at gmail.com>
    Suggested-by: Richard Henderson <rth at twiddle.net>
    Reviewed-by: Richard Henderson <rth at twiddle.net>
    Signed-off-by: Alexander Graf <agraf at suse.de>

diff --git a/target-ppc/translate.c b/target-ppc/translate.c
index 57cb381..ced295f 100644
--- a/target-ppc/translate.c
+++ b/target-ppc/translate.c
@@ -1138,9 +1138,8 @@ static void gen_mullw(DisasContext *ctx)
     tcg_temp_free(t0);
     tcg_temp_free(t1);
 #else
-    tcg_gen_mul_tl(cpu_gpr[rD(ctx->opcode)], cpu_gpr[rA(ctx->opcode)],
-                   cpu_gpr[rB(ctx->opcode)]);
-    tcg_gen_ext32s_tl(cpu_gpr[rD(ctx->opcode)], cpu_gpr[rD(ctx->opcode)]);
+    tcg_gen_mul_i32(cpu_gpr[rD(ctx->opcode)], cpu_gpr[rA(ctx->opcode)],
+                    cpu_gpr[rB(ctx->opcode)]);
 #endif
     if (unlikely(Rc(ctx->opcode) != 0))
         gen_set_Rc0(ctx, cpu_gpr[rD(ctx->opcode)]);
commit 57fca134bb64926f00ab8b14cdb8d345f395e07f
Author: Tom Musta <tommusta at gmail.com>
Date:   Mon Aug 25 14:25:41 2014 -0500

    target-ppc: Optimize rlwnm MB=0 ME=31
    
    Optimize the special case of rlwnm where MB=0 and ME=31.  This can
    be implemented using a ROTL.
    
    Suggested-by: Richard Henderson <rth at twiddle.net>
    Signed-off-by: Tom Musta <tommusta at gmail.com>
    Reviewed-by: Richard Henderson <rth at twiddle.net>
    Signed-off-by: Alexander Graf <agraf at suse.de>

diff --git a/target-ppc/translate.c b/target-ppc/translate.c
index 889e37d..57cb381 100644
--- a/target-ppc/translate.c
+++ b/target-ppc/translate.c
@@ -1721,37 +1721,49 @@ static void gen_rlwinm(DisasContext *ctx)
 static void gen_rlwnm(DisasContext *ctx)
 {
     uint32_t mb, me;
-    TCGv t0;
-#if defined(TARGET_PPC64)
-    TCGv t1;
-#endif
-
     mb = MB(ctx->opcode);
     me = ME(ctx->opcode);
-    t0 = tcg_temp_new();
-    tcg_gen_andi_tl(t0, cpu_gpr[rB(ctx->opcode)], 0x1f);
+
+    if (likely(mb == 0 && me == 31)) {
+        TCGv_i32 t0, t1;
+        t0 = tcg_temp_new_i32();
+        t1 = tcg_temp_new_i32();
+        tcg_gen_trunc_tl_i32(t0, cpu_gpr[rB(ctx->opcode)]);
+        tcg_gen_trunc_tl_i32(t1, cpu_gpr[rS(ctx->opcode)]);
+        tcg_gen_andi_i32(t0, t0, 0x1f);
+        tcg_gen_rotl_i32(t1, t1, t0);
+        tcg_gen_extu_i32_tl(cpu_gpr[rA(ctx->opcode)], t1);
+        tcg_temp_free_i32(t0);
+        tcg_temp_free_i32(t1);
+    } else {
+        TCGv t0;
 #if defined(TARGET_PPC64)
-    t1 = tcg_temp_new_i64();
-    tcg_gen_deposit_i64(t1, cpu_gpr[rS(ctx->opcode)],
-        cpu_gpr[rS(ctx->opcode)], 32, 32);
-    tcg_gen_rotl_i64(t0, t1, t0);
-    tcg_temp_free_i64(t1);
-#else
-    tcg_gen_rotl_i32(t0, cpu_gpr[rS(ctx->opcode)], t0);
+        TCGv t1;
 #endif
-    if (unlikely(mb != 0 || me != 31)) {
+
+        t0 = tcg_temp_new();
+        tcg_gen_andi_tl(t0, cpu_gpr[rB(ctx->opcode)], 0x1f);
 #if defined(TARGET_PPC64)
-        mb += 32;
-        me += 32;
+        t1 = tcg_temp_new_i64();
+        tcg_gen_deposit_i64(t1, cpu_gpr[rS(ctx->opcode)],
+                            cpu_gpr[rS(ctx->opcode)], 32, 32);
+        tcg_gen_rotl_i64(t0, t1, t0);
+        tcg_temp_free_i64(t1);
+#else
+        tcg_gen_rotl_i32(t0, cpu_gpr[rS(ctx->opcode)], t0);
 #endif
-        tcg_gen_andi_tl(cpu_gpr[rA(ctx->opcode)], t0, MASK(mb, me));
-    } else {
+        if (unlikely(mb != 0 || me != 31)) {
 #if defined(TARGET_PPC64)
-        tcg_gen_andi_tl(t0, t0, MASK(32, 63));
+            mb += 32;
+            me += 32;
 #endif
-        tcg_gen_mov_tl(cpu_gpr[rA(ctx->opcode)], t0);
+            tcg_gen_andi_tl(cpu_gpr[rA(ctx->opcode)], t0, MASK(mb, me));
+        } else {
+            tcg_gen_andi_tl(t0, t0, MASK(32, 63));
+            tcg_gen_mov_tl(cpu_gpr[rA(ctx->opcode)], t0);
+        }
+        tcg_temp_free(t0);
     }
-    tcg_temp_free(t0);
     if (unlikely(Rc(ctx->opcode) != 0))
         gen_set_Rc0(ctx, cpu_gpr[rA(ctx->opcode)]);
 }
commit 8979c2f602357129fdf07a5cf8484ca430928b47
Author: Tom Musta <tommusta at gmail.com>
Date:   Mon Aug 25 14:25:40 2014 -0500

    target-ppc: Optimize rlwinm MB=0 ME=31
    
    Optimize the special case of rlwinm where MB=0 and ME=31.  This can
    be implemented as a 32-bit ROTL.
    
    Signed-off-by: Tom Musta <tommusta at gmail.com>
    Suggested-by: Richard Henderson <rth at twiddle.net>
    Reviewed-by: Richard Henderson <rth at twiddle.net>
    Signed-off-by: Alexander Graf <agraf at suse.de>

diff --git a/target-ppc/translate.c b/target-ppc/translate.c
index 095b83c..889e37d 100644
--- a/target-ppc/translate.c
+++ b/target-ppc/translate.c
@@ -1691,6 +1691,12 @@ static void gen_rlwinm(DisasContext *ctx)
         tcg_gen_shri_tl(t0, t0, mb);
         tcg_gen_ext32u_tl(cpu_gpr[rA(ctx->opcode)], t0);
         tcg_temp_free(t0);
+    } else if (likely(mb == 0 && me == 31)) {
+        TCGv_i32 t0 = tcg_temp_new_i32();
+        tcg_gen_trunc_tl_i32(t0, cpu_gpr[rS(ctx->opcode)]);
+        tcg_gen_rotli_i32(t0, t0, sh);
+        tcg_gen_extu_i32_tl(cpu_gpr[rA(ctx->opcode)], t0);
+        tcg_temp_free_i32(t0);
     } else {
         TCGv t0 = tcg_temp_new();
 #if defined(TARGET_PPC64)
commit ab92678d0a24e7ef8d4d93d18e5c0df8619874fe
Author: Tom Musta <tommusta at gmail.com>
Date:   Mon Aug 25 14:25:39 2014 -0500

    target-ppc: Special Case of rlwimi Should Use Deposit
    
    The special case of rlwimi where MB <= ME and SH = 31-ME can be implemented
    with a single TCG deposit operation.  This replaces the less general case
    of SH = MB = 0 and ME = 31.
    
    Signed-off-by: Tom Musta <tommusta at gmail.com>
    Suggested-by: Richard Henderson <rth at twiddle.net>
    Reviewed-by: Richard Henderson <rth at twiddle.net>
    Signed-off-by: Alexander Graf <agraf at suse.de>

diff --git a/target-ppc/translate.c b/target-ppc/translate.c
index 47dc903..095b83c 100644
--- a/target-ppc/translate.c
+++ b/target-ppc/translate.c
@@ -1636,12 +1636,9 @@ static void gen_rlwimi(DisasContext *ctx)
     mb = MB(ctx->opcode);
     me = ME(ctx->opcode);
     sh = SH(ctx->opcode);
-    if (likely(sh == 0 && mb == 0 && me == 31)) {
-#if defined(TARGET_PPC64)
-        tcg_gen_mov_i64(cpu_gpr[rA(ctx->opcode)], cpu_gpr[rS(ctx->opcode)]);
-#else
-        tcg_gen_ext32u_tl(cpu_gpr[rA(ctx->opcode)], cpu_gpr[rS(ctx->opcode)]);
-#endif
+    if (likely(sh == (31-me) && mb <= me)) {
+        tcg_gen_deposit_tl(cpu_gpr[rA(ctx->opcode)], cpu_gpr[rA(ctx->opcode)],
+                           cpu_gpr[rS(ctx->opcode)], sh, me - mb + 1);
     } else {
         target_ulong mask;
         TCGv t1;
commit 439ce1401bac1687c711cb6acf4ee8f3f457c05e
Author: Anton Blanchard <anton at samba.org>
Date:   Fri Aug 22 11:50:57 2014 +1000

    spapr-vlan: Don't touch last entry in buffer list
    
    The last 8 bytes of the buffer list is defined to contain the number
    of dropped frames. At the moment we use it to store rx entries,
    which trips up ethtool -S:
    
    rx_no_buffer: 9223380832981355136
    
    Fix this by skipping the last buffer list entry.
    
    Signed-off-by: Anton Blanchard <anton at samba.org>
    Reviewed-by: David Gibson <david at gibson.dropbear.id.au>
    Signed-off-by: Alexander Graf <agraf at suse.de>

diff --git a/hw/net/spapr_llan.c b/hw/net/spapr_llan.c
index 2d47df6..23c47d3 100644
--- a/hw/net/spapr_llan.c
+++ b/hw/net/spapr_llan.c
@@ -72,7 +72,14 @@ typedef uint64_t vlan_bd_t;
 #define VLAN_RXQ_BD_OFF      0
 #define VLAN_FILTER_BD_OFF   8
 #define VLAN_RX_BDS_OFF      16
-#define VLAN_MAX_BUFS        ((SPAPR_TCE_PAGE_SIZE - VLAN_RX_BDS_OFF) / 8)
+/*
+ * The final 8 bytes of the buffer list is a counter of frames dropped
+ * because there was not a buffer in the buffer list capable of holding
+ * the frame. We must avoid it, or the operating system will report garbage
+ * for this statistic.
+ */
+#define VLAN_RX_BDS_LEN      (SPAPR_TCE_PAGE_SIZE - VLAN_RX_BDS_OFF - 8)
+#define VLAN_MAX_BUFS        (VLAN_RX_BDS_LEN / 8)
 
 #define TYPE_VIO_SPAPR_VLAN_DEVICE "spapr-vlan"
 #define VIO_SPAPR_VLAN_DEVICE(obj) \
@@ -119,7 +126,7 @@ static ssize_t spapr_vlan_receive(NetClientState *nc, const uint8_t *buf,
 
     do {
         buf_ptr += 8;
-        if (buf_ptr >= SPAPR_TCE_PAGE_SIZE) {
+        if (buf_ptr >= (VLAN_RX_BDS_LEN + VLAN_RX_BDS_OFF)) {
             buf_ptr = VLAN_RX_BDS_OFF;
         }
 
@@ -397,7 +404,7 @@ static target_ulong h_add_logical_lan_buffer(PowerPCCPU *cpu,
 
     do {
         dev->add_buf_ptr += 8;
-        if (dev->add_buf_ptr >= SPAPR_TCE_PAGE_SIZE) {
+        if (dev->add_buf_ptr >= (VLAN_RX_BDS_LEN + VLAN_RX_BDS_OFF)) {
             dev->add_buf_ptr = VLAN_RX_BDS_OFF;
         }
 
commit 32420522482ffc20f8e9423af4f41f4e05ce3a56
Author: Alexey Kardashevskiy <aik at ozlabs.ru>
Date:   Wed Aug 13 17:20:53 2014 +1000

    spapr_pci: Fix config space corruption
    
    When disabling MSI/MSIX via "ibm,change-msi" RTAS call, no check was made
    if MSI or MSIX is actually supported and the MSI message was reset
    unconditionally. If this happened on a device which does not support MSI
    (but does support MSIX, otherwise "ibm,change-msi" would not be called),
    this device would have PCIDevice::msi_cap field (MSI capability offset)
    set to zero and writing a vector would actually clear PCI status.
    
    This clears MSI message only if MSI or MSIX is present on a device.
    
    Signed-off-by: Alexey Kardashevskiy <aik at ozlabs.ru>
    Signed-off-by: Alexander Graf <agraf at suse.de>

diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c
index 9ed39a9..e894f07 100644
--- a/hw/ppc/spapr_pci.c
+++ b/hw/ppc/spapr_pci.c
@@ -262,7 +262,6 @@ static void rtas_ibm_change_msi(PowerPCCPU *cpu, sPAPREnvironment *spapr,
     unsigned int irq, max_irqs = 0, num = 0;
     sPAPRPHBState *phb = NULL;
     PCIDevice *pdev = NULL;
-    bool msix = false;
     spapr_pci_msi *msi;
     int *config_addr_key;
 
@@ -300,7 +299,12 @@ static void rtas_ibm_change_msi(PowerPCCPU *cpu, sPAPREnvironment *spapr,
         }
 
         xics_free(spapr->icp, msi->first_irq, msi->num);
-        spapr_msi_setmsg(pdev, 0, msix, 0, num);
+        if (msi_present(pdev)) {
+            spapr_msi_setmsg(pdev, 0, false, 0, num);
+        }
+        if (msix_present(pdev)) {
+            spapr_msi_setmsg(pdev, 0, true, 0, num);
+        }
         g_hash_table_remove(phb->msi, &config_addr);
 
         trace_spapr_pci_msi("Released MSIs", config_addr);
commit b981289c493c7ddabc1cdf7de99daa24642c7739
Author: Alexander Graf <agraf at suse.de>
Date:   Sun Jul 13 22:31:53 2014 +0200

    PPC: Cuda: Use cuda timer to expose tbfreq to guest
    
    Mac OS X calibrates a number of frequencies on bootup based on reading
    tb values on bootup and comparing them to via cuda timer values.
    
    The only variable we can really steer well (thanks to KVM) is the cuda
    frequency. So let's use that one to fake Mac OS X into believing the
    bus frequency is tbfreq * 4. That way Mac OS X will automatically
    calculate the correct timebase frequency.
    
    With this patch and the patch set I posted earlier I can successfully
    run Mac OS X 10.2, 10.3 and 10.4 guests with -M mac99 on TCG and KVM.
    
    Suggested-by: Benjamin Herrenschmidt <benh at kernel.crashing.org>
    Signed-off-by: Alexander Graf <agraf at suse.de>

diff --git a/hw/misc/macio/cuda.c b/hw/misc/macio/cuda.c
index ff6051d..b4273aa 100644
--- a/hw/misc/macio/cuda.c
+++ b/hw/misc/macio/cuda.c
@@ -123,13 +123,22 @@ static void cuda_update_irq(CUDAState *s)
     }
 }
 
+static uint64_t get_tb(uint64_t freq)
+{
+    return muldiv64(qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL),
+                    freq, get_ticks_per_sec());
+}
+
 static unsigned int get_counter(CUDATimer *s)
 {
     int64_t d;
     unsigned int counter;
+    uint64_t tb_diff;
+
+    /* Reverse of the tb calculation algorithm that Mac OS X uses on bootup. */
+    tb_diff = get_tb(s->frequency) - s->load_time;
+    d = (tb_diff * 0xBF401675E5DULL) / (s->frequency << 24);
 
-    d = muldiv64(qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) - s->load_time,
-                 CUDA_TIMER_FREQ, get_ticks_per_sec());
     if (s->index == 0) {
         /* the timer goes down from latch to -1 (period of latch + 2) */
         if (d <= (s->counter_value + 1)) {
@@ -147,7 +156,7 @@ static unsigned int get_counter(CUDATimer *s)
 static void set_counter(CUDAState *s, CUDATimer *ti, unsigned int val)
 {
     CUDA_DPRINTF("T%d.counter=%d\n", 1 + (ti->timer == NULL), val);
-    ti->load_time = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
+    ti->load_time = get_tb(s->frequency);
     ti->counter_value = val;
     cuda_timer_update(s, ti, ti->load_time);
 }
@@ -688,6 +697,8 @@ static void cuda_realizefn(DeviceState *dev, Error **errp)
     struct tm tm;
 
     s->timers[0].timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, cuda_timer1, s);
+    s->timers[0].frequency = s->frequency;
+    s->timers[1].frequency = s->frequency;
 
     qemu_get_timedate(&tm, 0);
     s->tick_offset = (uint32_t)mktimegm(&tm) + RTC_OFFSET;
@@ -713,6 +724,11 @@ static void cuda_initfn(Object *obj)
                         DEVICE(obj), "adb.0");
 }
 
+static Property cuda_properties[] = {
+    DEFINE_PROP_UINT64("frequency", CUDAState, frequency, 0),
+    DEFINE_PROP_END_OF_LIST()
+};
+
 static void cuda_class_init(ObjectClass *oc, void *data)
 {
     DeviceClass *dc = DEVICE_CLASS(oc);
@@ -720,6 +736,7 @@ static void cuda_class_init(ObjectClass *oc, void *data)
     dc->realize = cuda_realizefn;
     dc->reset = cuda_reset;
     dc->vmsd = &vmstate_cuda;
+    dc->props = cuda_properties;
 }
 
 static const TypeInfo cuda_type_info = {
diff --git a/hw/misc/macio/macio.c b/hw/misc/macio/macio.c
index 35eaa00..e0f1e88 100644
--- a/hw/misc/macio/macio.c
+++ b/hw/misc/macio/macio.c
@@ -42,6 +42,7 @@ typedef struct MacIOState
     void *dbdma;
     MemoryRegion *pic_mem;
     MemoryRegion *escc_mem;
+    uint64_t frequency;
 } MacIOState;
 
 #define OLDWORLD_MACIO(obj) \
@@ -351,12 +352,19 @@ static void macio_newworld_class_init(ObjectClass *oc, void *data)
     pdc->device_id = PCI_DEVICE_ID_APPLE_UNI_N_KEYL;
 }
 
+static Property macio_properties[] = {
+    DEFINE_PROP_UINT64("frequency", MacIOState, frequency, 0),
+    DEFINE_PROP_END_OF_LIST()
+};
+
 static void macio_class_init(ObjectClass *klass, void *data)
 {
     PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
+    DeviceClass *dc = DEVICE_CLASS(klass);
 
     k->vendor_id = PCI_VENDOR_ID_APPLE;
     k->class_id = PCI_CLASS_OTHERS << 8;
+    dc->props = macio_properties;
 }
 
 static const TypeInfo macio_oldworld_type_info = {
@@ -403,6 +411,8 @@ void macio_init(PCIDevice *d,
     macio_state->escc_mem = escc_mem;
     /* Note: this code is strongly inspirated from the corresponding code
        in PearPC */
+    qdev_prop_set_uint64(DEVICE(&macio_state->cuda), "frequency",
+                         macio_state->frequency);
 
     qdev_init_nofail(DEVICE(d));
 }
diff --git a/hw/ppc/mac.h b/hw/ppc/mac.h
index 23536f4..aff2b9a 100644
--- a/hw/ppc/mac.h
+++ b/hw/ppc/mac.h
@@ -57,6 +57,7 @@ typedef struct CUDATimer {
     uint16_t counter_value;
     int64_t load_time;
     int64_t next_irq_time;
+    uint64_t frequency;
     QEMUTimer *timer;
 } CUDATimer;
 
@@ -97,6 +98,7 @@ typedef struct CUDAState {
     CUDATimer timers[2];
 
     uint32_t tick_offset;
+    uint64_t frequency;
 
     uint8_t last_b;
     uint8_t last_acr;
diff --git a/hw/ppc/mac_newworld.c b/hw/ppc/mac_newworld.c
index d525247..8453bfa 100644
--- a/hw/ppc/mac_newworld.c
+++ b/hw/ppc/mac_newworld.c
@@ -395,6 +395,7 @@ static void ppc_core99_init(MachineState *machine)
     qdev_connect_gpio_out(dev, 2, pic[0x02]); /* IDE DMA */
     qdev_connect_gpio_out(dev, 3, pic[0x0e]); /* IDE */
     qdev_connect_gpio_out(dev, 4, pic[0x03]); /* IDE DMA */
+    qdev_prop_set_uint64(dev, "frequency", tbfreq);
     macio_init(macio, pic_mem, escc_bar);
 
     /* We only emulate 2 out of 3 IDE controllers for now */
diff --git a/hw/ppc/mac_oldworld.c b/hw/ppc/mac_oldworld.c
index 863dd2f..630a9f9 100644
--- a/hw/ppc/mac_oldworld.c
+++ b/hw/ppc/mac_oldworld.c
@@ -286,6 +286,7 @@ static void ppc_heathrow_init(MachineState *machine)
     qdev_connect_gpio_out(dev, 2, pic[0x02]); /* IDE-0 DMA */
     qdev_connect_gpio_out(dev, 3, pic[0x0E]); /* IDE-1 */
     qdev_connect_gpio_out(dev, 4, pic[0x03]); /* IDE-1 DMA */
+    qdev_prop_set_uint64(dev, "frequency", tbfreq);
     macio_init(macio, pic_mem, escc_bar);
 
     macio_ide = MACIO_IDE(object_resolve_path_component(OBJECT(macio),
commit caae6c961107c4c55731a86572f9a1f53837636b
Author: Alexander Graf <agraf at suse.de>
Date:   Sun Jul 13 22:29:02 2014 +0200

    PPC: Mac: Move tbfreq into local variable
    
    We already expose the real CPU's tb frequency to the guest via fw_cfg. Soon
    we will need to also expose it to the MacIO, so let's move it to a variable
    that we can leverage every time we need the frequency.
    
    Signed-off-by: Alexander Graf <agraf at suse.de>

diff --git a/hw/ppc/mac_newworld.c b/hw/ppc/mac_newworld.c
index 26067b4..d525247 100644
--- a/hw/ppc/mac_newworld.c
+++ b/hw/ppc/mac_newworld.c
@@ -177,6 +177,7 @@ static void ppc_core99_init(MachineState *machine)
     DeviceState *dev;
     int *token = g_new(int, 1);
     hwaddr nvram_addr = 0xFFF04000;
+    uint64_t tbfreq;
 
     linux_boot = (kernel_filename != NULL);
 
@@ -373,6 +374,14 @@ static void ppc_core99_init(MachineState *machine)
         pci_bus = pci_pmac_init(pic, get_system_memory(), get_system_io());
         machine_arch = ARCH_MAC99;
     }
+
+    /* Timebase Frequency */
+    if (kvm_enabled()) {
+        tbfreq = kvmppc_get_tbfreq();
+    } else {
+        tbfreq = TBFREQ;
+    }
+
     /* init basic PC hardware */
     escc_mem = escc_init(0, pic[0x25], pic[0x24],
                          serial_hds[0], serial_hds[1], ESCC_CLOCK, 4);
@@ -469,15 +478,13 @@ static void ppc_core99_init(MachineState *machine)
 #ifdef CONFIG_KVM
         uint8_t *hypercall;
 
-        fw_cfg_add_i32(fw_cfg, FW_CFG_PPC_TBFREQ, kvmppc_get_tbfreq());
         hypercall = g_malloc(16);
         kvmppc_get_hypercall(env, hypercall, 16);
         fw_cfg_add_bytes(fw_cfg, FW_CFG_PPC_KVM_HC, hypercall, 16);
         fw_cfg_add_i32(fw_cfg, FW_CFG_PPC_KVM_PID, getpid());
 #endif
-    } else {
-        fw_cfg_add_i32(fw_cfg, FW_CFG_PPC_TBFREQ, TBFREQ);
     }
+    fw_cfg_add_i32(fw_cfg, FW_CFG_PPC_TBFREQ, tbfreq);
     /* Mac OS X requires a "known good" clock-frequency value; pass it one. */
     fw_cfg_add_i32(fw_cfg, FW_CFG_PPC_CLOCKFREQ, CLOCKFREQ);
     fw_cfg_add_i32(fw_cfg, FW_CFG_PPC_BUSFREQ, BUSFREQ);
diff --git a/hw/ppc/mac_oldworld.c b/hw/ppc/mac_oldworld.c
index ec7ed38..863dd2f 100644
--- a/hw/ppc/mac_oldworld.c
+++ b/hw/ppc/mac_oldworld.c
@@ -103,6 +103,7 @@ static void ppc_heathrow_init(MachineState *machine)
     uint16_t ppc_boot_device;
     DriveInfo *hd[MAX_IDE_BUS * MAX_IDE_DEVS];
     void *fw_cfg;
+    uint64_t tbfreq;
 
     linux_boot = (kernel_filename != NULL);
 
@@ -250,6 +251,13 @@ static void ppc_heathrow_init(MachineState *machine)
         }
     }
 
+    /* Timebase Frequency */
+    if (kvm_enabled()) {
+        tbfreq = kvmppc_get_tbfreq();
+    } else {
+        tbfreq = TBFREQ;
+    }
+
     /* init basic PC hardware */
     if (PPC_INPUT(env) != PPC_FLAGS_INPUT_6xx) {
         hw_error("Only 6xx bus is supported on heathrow machine\n");
@@ -330,15 +338,13 @@ static void ppc_heathrow_init(MachineState *machine)
 #ifdef CONFIG_KVM
         uint8_t *hypercall;
 
-        fw_cfg_add_i32(fw_cfg, FW_CFG_PPC_TBFREQ, kvmppc_get_tbfreq());
         hypercall = g_malloc(16);
         kvmppc_get_hypercall(env, hypercall, 16);
         fw_cfg_add_bytes(fw_cfg, FW_CFG_PPC_KVM_HC, hypercall, 16);
         fw_cfg_add_i32(fw_cfg, FW_CFG_PPC_KVM_PID, getpid());
 #endif
-    } else {
-        fw_cfg_add_i32(fw_cfg, FW_CFG_PPC_TBFREQ, TBFREQ);
     }
+    fw_cfg_add_i32(fw_cfg, FW_CFG_PPC_TBFREQ, tbfreq);
     /* Mac OS X requires a "known good" clock-frequency value; pass it one. */
     fw_cfg_add_i32(fw_cfg, FW_CFG_PPC_CLOCKFREQ, CLOCKFREQ);
     fw_cfg_add_i32(fw_cfg, FW_CFG_PPC_BUSFREQ, BUSFREQ);
commit 2d9907a3332888e43bc73fe9b98a32f8de662526
Author: Alexander Graf <agraf at suse.de>
Date:   Sun Jul 13 17:09:55 2014 +0200

    PPC: mac_nvram: Split NVRAM into OF and OSX parts
    
    Mac OS X (at least with -M mac99) searches for a valid NVRAM partition
    of a special Apple type. If it can't find that partition in the first
    half of NVRAM, it will look at the second half.
    
    There are a few implications from this. The first is that we need to
    split NVRAM into 2 halves - one for Open Firmware use, the other one for
    Mac OS X. Without this split Mac OS X will just loop endlessly over the
    second half trying to find a partition.
    
    The other implication is that we should provide a specially crafted Mac
    OS X compatible NVRAM partition on the second half that Mac OS X can
    happily use as it sees fit.
    
    Signed-off-by: Alexander Graf <agraf at suse.de>

diff --git a/hw/nvram/mac_nvram.c b/hw/nvram/mac_nvram.c
index 7656951..d35f8a3 100644
--- a/hw/nvram/mac_nvram.c
+++ b/hw/nvram/mac_nvram.c
@@ -26,6 +26,7 @@
 #include "hw/nvram/openbios_firmware_abi.h"
 #include "sysemu/sysemu.h"
 #include "hw/ppc/mac.h"
+#include <zlib.h>
 
 /* debug NVR */
 //#define DEBUG_NVR
@@ -137,15 +138,16 @@ static void macio_nvram_register_types(void)
 }
 
 /* Set up a system OpenBIOS NVRAM partition */
-void pmac_format_nvram_partition (MacIONVRAMState *nvr, int len)
+static void pmac_format_nvram_partition_of(MacIONVRAMState *nvr, int off,
+                                           int len)
 {
     unsigned int i;
-    uint32_t start = 0, end;
+    uint32_t start = off, end;
     struct OpenBIOS_nvpart_v1 *part_header;
 
     // OpenBIOS nvram variables
     // Variable partition
-    part_header = (struct OpenBIOS_nvpart_v1 *)nvr->data;
+    part_header = (struct OpenBIOS_nvpart_v1 *)&nvr->data[start];
     part_header->signature = OPENBIOS_PART_SYSTEM;
     pstrcpy(part_header->name, sizeof(part_header->name), "system");
 
@@ -173,4 +175,39 @@ void pmac_format_nvram_partition (MacIONVRAMState *nvr, int len)
     OpenBIOS_finish_partition(part_header, end - start);
 }
 
+#define OSX_NVRAM_SIGNATURE     (0x5A)
+
+/* Set up a Mac OS X NVRAM partition */
+static void pmac_format_nvram_partition_osx(MacIONVRAMState *nvr, int off,
+                                            int len)
+{
+    uint32_t start = off;
+    struct OpenBIOS_nvpart_v1 *part_header;
+    unsigned char *data = &nvr->data[start];
+
+    /* empty partition */
+    part_header = (struct OpenBIOS_nvpart_v1 *)data;
+    part_header->signature = OSX_NVRAM_SIGNATURE;
+    pstrcpy(part_header->name, sizeof(part_header->name), "wwwwwwwwwwww");
+
+    OpenBIOS_finish_partition(part_header, len);
+
+    /* Generation */
+    stl_be_p(&data[20], 2);
+
+    /* Adler32 checksum */
+    stl_be_p(&data[16], adler32(0, &data[20], len - 20));
+}
+
+/* Set up NVRAM with OF and OSX partitions */
+void pmac_format_nvram_partition(MacIONVRAMState *nvr, int len)
+{
+    /*
+     * Mac OS X expects side "B" of the flash at the second half of NVRAM,
+     * so we use half of the chip for OF and the other half for a free OSX
+     * partition.
+     */
+    pmac_format_nvram_partition_of(nvr, 0, len / 2);
+    pmac_format_nvram_partition_osx(nvr, len / 2, len / 2);
+}
 type_init(macio_nvram_register_types)
commit b19eae18c1cdf053fd85a39902cf77d8b561ef76
Author: Alexander Graf <agraf at suse.de>
Date:   Sun Jul 13 16:55:53 2014 +0200

    PPC: mac_nvram: Allow 2 and 4 byte accesses
    
    The NVRAM in our Core99 machine really supports 2byte and 4byte accesses
    just as well as 1byte accesses. In fact, Mac OS X uses those.
    
    Add support for higher register size granularities.
    
    Signed-off-by: Alexander Graf <agraf at suse.de>

diff --git a/hw/nvram/mac_nvram.c b/hw/nvram/mac_nvram.c
index bcff074..7656951 100644
--- a/hw/nvram/mac_nvram.c
+++ b/hw/nvram/mac_nvram.c
@@ -66,6 +66,10 @@ static uint64_t macio_nvram_readb(void *opaque, hwaddr addr,
 static const MemoryRegionOps macio_nvram_ops = {
     .read = macio_nvram_readb,
     .write = macio_nvram_writeb,
+    .valid.min_access_size = 1,
+    .valid.max_access_size = 4,
+    .impl.min_access_size = 1,
+    .impl.max_access_size = 1,
     .endianness = DEVICE_BIG_ENDIAN,
 };
 
commit a8b0503701ed8de9353834b0955260f4d9f08640
Author: Alexander Graf <agraf at suse.de>
Date:   Sun Jul 13 16:50:39 2014 +0200

    PPC: mac_nvram: Remove unused functions
    
    The macio_nvram_read and macio_nvram_write functions are never called,
    just remove them.
    
    Signed-off-by: Alexander Graf <agraf at suse.de>

diff --git a/hw/nvram/mac_nvram.c b/hw/nvram/mac_nvram.c
index 170b10b..bcff074 100644
--- a/hw/nvram/mac_nvram.c
+++ b/hw/nvram/mac_nvram.c
@@ -39,29 +39,6 @@
 
 #define DEF_SYSTEM_SIZE 0xc10
 
-/* Direct access to NVRAM */
-uint8_t macio_nvram_read(MacIONVRAMState *s, uint32_t addr)
-{
-    uint32_t ret;
-
-    if (addr < s->size) {
-        ret = s->data[addr];
-    } else {
-        ret = -1;
-    }
-    NVR_DPRINTF("read addr %04" PRIx32 " val %" PRIx8 "\n", addr, ret);
-
-    return ret;
-}
-
-void macio_nvram_write(MacIONVRAMState *s, uint32_t addr, uint8_t val)
-{
-    NVR_DPRINTF("write addr %04" PRIx32 " val %" PRIx8 "\n", addr, val);
-    if (addr < s->size) {
-        s->data[addr] = val;
-    }
-}
-
 /* macio style NVRAM device */
 static void macio_nvram_writeb(void *opaque, hwaddr addr,
                                uint64_t value, unsigned size)
diff --git a/hw/ppc/mac.h b/hw/ppc/mac.h
index c1faf9c..23536f4 100644
--- a/hw/ppc/mac.h
+++ b/hw/ppc/mac.h
@@ -178,6 +178,4 @@ typedef struct MacIONVRAMState {
 } MacIONVRAMState;
 
 void pmac_format_nvram_partition (MacIONVRAMState *nvr, int len);
-uint8_t macio_nvram_read(MacIONVRAMState *s, uint32_t addr);
-void macio_nvram_write(MacIONVRAMState *s, uint32_t addr, uint8_t val);
 #endif /* !defined(__PPC_MAC_H__) */
commit d696760b43ca46c070f74fe12d90f38904232467
Author: Alexander Graf <agraf at suse.de>
Date:   Sun Jul 13 16:45:46 2014 +0200

    PPC: mac99: Fix core99 timer frequency
    
    There is a special timer in the mac99 machine that we recently started
    to emulate. Unfortunately we emulated it in the wrong frequency.
    
    This patch adapts the frequency Mac OS X uses to evaluate results from
    this timer, making calculations it bases off of it work.
    
    Signed-off-by: Alexander Graf <agraf at suse.de>

diff --git a/hw/misc/macio/macio.c b/hw/misc/macio/macio.c
index 47f45f5..35eaa00 100644
--- a/hw/misc/macio/macio.c
+++ b/hw/misc/macio/macio.c
@@ -243,13 +243,18 @@ static void timer_write(void *opaque, hwaddr addr, uint64_t value,
 static uint64_t timer_read(void *opaque, hwaddr addr, unsigned size)
 {
     uint32_t value = 0;
+    uint64_t systime = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
+    uint64_t kltime;
+
+    kltime = muldiv64(systime, 4194300, get_ticks_per_sec() * 4);
+    kltime = muldiv64(kltime, 18432000, 1048575);
 
     switch (addr) {
     case 0x38:
-        value = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
+        value = kltime;
         break;
     case 0x3c:
-        value = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) >> 32;
+        value = kltime >> 32;
         break;
     }
 
commit 6fd33a750214a866772dd77573cfa24c27ad956d
Author: Alexander Graf <agraf at suse.de>
Date:   Mon Jul 14 19:17:35 2014 +0200

    PPC: KVM: Use vm check_extension for pv hcall
    
    To find out whether we support the KVM hypercall interface we need to ask KVM
    on the VM level rather than the global KVM level, because Book3S HV KVM does
    not support it and we play conservative when both HV and PR are loaded.
    
    So instead, use the VM helper that falls back to global KVM enumeration. That
    should cover all cases.
    
    Signed-off-by: Alexander Graf <agraf at suse.de>

diff --git a/target-ppc/kvm.c b/target-ppc/kvm.c
index 2b6fee6..9c23c6b 100644
--- a/target-ppc/kvm.c
+++ b/target-ppc/kvm.c
@@ -1835,7 +1835,7 @@ static int kvmppc_get_pvinfo(CPUPPCState *env, struct kvm_ppc_pvinfo *pvinfo)
      PowerPCCPU *cpu = ppc_env_get_cpu(env);
      CPUState *cs = CPU(cpu);
 
-    if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO) &&
+    if (kvm_vm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO) &&
         !kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_PVINFO, pvinfo)) {
         return 0;
     }
commit 7d0a07fa926436baf1238dcf68a55ea96cf5b9ab
Author: Alexander Graf <agraf at suse.de>
Date:   Mon Jul 14 19:15:15 2014 +0200

    KVM: Add helper to run KVM_CHECK_EXTENSION on vm fd
    
    We now can call KVM_CHECK_EXTENSION on the kvm fd or on the vm fd, whereas
    the vm version is more accurate when it comes to PPC KVM.
    
    Add a helper to make the vm version available that falls back to the non-vm
    variant if the vm one is not available yet to stay compatible.
    
    Signed-off-by: Alexander Graf <agraf at suse.de>

diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h
index 174ea36..d2000af 100644
--- a/include/sysemu/kvm.h
+++ b/include/sysemu/kvm.h
@@ -303,6 +303,8 @@ bool kvm_arch_stop_on_emulation_error(CPUState *cpu);
 
 int kvm_check_extension(KVMState *s, unsigned int extension);
 
+int kvm_vm_check_extension(KVMState *s, unsigned int extension);
+
 #define kvm_vm_enable_cap(s, capability, cap_flags, ...)             \
     ({                                                               \
         struct kvm_enable_cap cap = {                                \
diff --git a/kvm-all.c b/kvm-all.c
index b240bf8..b1cf703 100644
--- a/kvm-all.c
+++ b/kvm-all.c
@@ -493,6 +493,19 @@ int kvm_check_extension(KVMState *s, unsigned int extension)
     return ret;
 }
 
+int kvm_vm_check_extension(KVMState *s, unsigned int extension)
+{
+    int ret;
+
+    ret = kvm_vm_ioctl(s, KVM_CHECK_EXTENSION, extension);
+    if (ret < 0) {
+        /* VM wide version not implemented, use global one instead */
+        ret = kvm_check_extension(s, extension);
+    }
+
+    return ret;
+}
+
 static int kvm_set_ioeventfd_mmio(int fd, hwaddr addr, uint32_t val,
                                   bool assign, uint32_t size, bool datamatch)
 {
commit 4bc02e230d1e0fd41d2a892d81dcad56e3b3702d
Author: Tom Musta <tommusta at gmail.com>
Date:   Tue Aug 12 08:45:10 2014 -0500

    target-ppc: Bug Fix: srad
    
    Fix the check for carry in the srad helper to properly construct
    the mask -- a "1ULL" must be used (instead of "1") in order to
    get the desired result.
    
    Example:
    
    R3 8000000000000000
    R4 F3511AD4A2CD4C38
    srad 3,3,4
    
    Should *not* set XER[CA] but does without this patch.
    
    Signed-off-by: Tom Musta <tommusta at gmail.com>
    Signed-off-by: Alexander Graf <agraf at suse.de>

diff --git a/target-ppc/int_helper.c b/target-ppc/int_helper.c
index e83a25d..e5b103b 100644
--- a/target-ppc/int_helper.c
+++ b/target-ppc/int_helper.c
@@ -248,7 +248,7 @@ target_ulong helper_srad(CPUPPCState *env, target_ulong value,
         if (likely((uint64_t)shift != 0)) {
             shift &= 0x3f;
             ret = (int64_t)value >> shift;
-            if (likely(ret >= 0 || (value & ((1 << shift) - 1)) == 0)) {
+            if (likely(ret >= 0 || (value & ((1ULL << shift) - 1)) == 0)) {
                 env->ca = 0;
             } else {
                 env->ca = 1;
commit 34a0fad10210a3e639a8e68323c923494047eefc
Author: Tom Musta <tommusta at gmail.com>
Date:   Tue Aug 12 08:45:09 2014 -0500

    target-ppc: Bug Fix: srawi
    
    For 64 bit implementations, the special case of a shift by zero
    should result in the sign extension of the least significant 32 bits
    of the source GPR (not a direct copy of the 64 bit source GPR).
    
    Example:
    
    R3 A6212433228F41DC
    srawi 3,3,0
    R3 expected : 00000000228F41DC
    R3 actual   : A6212433228F41DC (without this patch)
    
    Signed-off-by: Tom Musta <tommusta at gmail.com>
    Signed-off-by: Alexander Graf <agraf at suse.de>

diff --git a/target-ppc/translate.c b/target-ppc/translate.c
index b19eb14..47dc903 100644
--- a/target-ppc/translate.c
+++ b/target-ppc/translate.c
@@ -1944,7 +1944,7 @@ static void gen_srawi(DisasContext *ctx)
     TCGv dst = cpu_gpr[rA(ctx->opcode)];
     TCGv src = cpu_gpr[rS(ctx->opcode)];
     if (sh == 0) {
-        tcg_gen_mov_tl(dst, src);
+        tcg_gen_ext32s_tl(dst, src);
         tcg_gen_movi_tl(cpu_ca, 0);
     } else {
         TCGv t0;
commit 9824d01d5d789a57d27360c0f5e8ee44955eb1d7
Author: Tom Musta <tommusta at gmail.com>
Date:   Tue Aug 12 08:45:08 2014 -0500

    target-ppc: Bug Fix: mulldo OV Detection
    
    Fix the code to properly detect overflow; the 128 bit signed
    product must have all zeroes or all ones in the first 65 bits
    otherwise OV should be set.
    
    Example:
    
    R3 45F086A5D5887509
    R4 0000000000000002
    mulldo 3,3,4
    
    Should set XER[OV].
    
    Signed-off-by: Tom Musta <tommusta at gmail.com>
    Signed-off-by: Alexander Graf <agraf at suse.de>

diff --git a/target-ppc/int_helper.c b/target-ppc/int_helper.c
index f6e8846..e83a25d 100644
--- a/target-ppc/int_helper.c
+++ b/target-ppc/int_helper.c
@@ -32,12 +32,22 @@ uint64_t helper_mulldo(CPUPPCState *env, uint64_t arg1, uint64_t arg2)
     uint64_t tl;
 
     muls64(&tl, (uint64_t *)&th, arg1, arg2);
-    /* If th != 0 && th != -1, then we had an overflow */
-    if (likely((uint64_t)(th + 1) <= 1)) {
+
+    /* th should either contain all 1 bits or all 0 bits and should
+     * match the sign bit of tl; otherwise we have overflowed. */
+
+    if ((int64_t)tl < 0) {
+        if (likely(th == -1LL)) {
+            env->ov = 0;
+        } else {
+            env->so = env->ov = 1;
+        }
+    } else if (likely(th == 0LL)) {
         env->ov = 0;
     } else {
         env->so = env->ov = 1;
     }
+
     return (int64_t)tl;
 }
 #endif
commit 1fa74845f2bab36bfa37108b9054b53c1b8264b9
Author: Tom Musta <tommusta at gmail.com>
Date:   Tue Aug 12 08:45:06 2014 -0500

    target-ppc: Bug Fix: mullw
    
    For 64-bit implementations, the mullw result is the 64 bit product
    of the sign-extended least significant 32 bits of the source
    registers.
    
    Fix the code to properly sign extend the source operands and produce
    a 64 bit product.
    
    Example:
    R3 00000000002F37A0
    R4 41C33D242F816715
    mullw 3,3,4
    R3 expected : 0008C3146AE0F020
    R3 actual   : 000000006AE0F020 (without this patch)
    
    Signed-off-by: Tom Musta <tommusta at gmail.com>
    Signed-off-by: Alexander Graf <agraf at suse.de>

diff --git a/target-ppc/translate.c b/target-ppc/translate.c
index dc80b02..b19eb14 100644
--- a/target-ppc/translate.c
+++ b/target-ppc/translate.c
@@ -1128,9 +1128,20 @@ static void gen_mulhwu(DisasContext *ctx)
 /* mullw  mullw. */
 static void gen_mullw(DisasContext *ctx)
 {
+#if defined(TARGET_PPC64)
+    TCGv_i64 t0, t1;
+    t0 = tcg_temp_new_i64();
+    t1 = tcg_temp_new_i64();
+    tcg_gen_ext32s_tl(t0, cpu_gpr[rA(ctx->opcode)]);
+    tcg_gen_ext32s_tl(t1, cpu_gpr[rB(ctx->opcode)]);
+    tcg_gen_mul_i64(cpu_gpr[rD(ctx->opcode)], t0, t1);
+    tcg_temp_free(t0);
+    tcg_temp_free(t1);
+#else
     tcg_gen_mul_tl(cpu_gpr[rD(ctx->opcode)], cpu_gpr[rA(ctx->opcode)],
                    cpu_gpr[rB(ctx->opcode)]);
     tcg_gen_ext32s_tl(cpu_gpr[rD(ctx->opcode)], cpu_gpr[rD(ctx->opcode)]);
+#endif
     if (unlikely(Rc(ctx->opcode) != 0))
         gen_set_Rc0(ctx, cpu_gpr[rD(ctx->opcode)]);
 }
commit f11ebbf8d4308795129bc6651cf701b61b812abf
Author: Tom Musta <tommusta at gmail.com>
Date:   Tue Aug 12 08:45:07 2014 -0500

    target-ppc: Bug Fix: mullwo
    
    On 64-bit implementations, the mullwo result is the 64 bit product of
    the signed 32 bit operands.  Fix the implementation to properly deposit
    the upper 32 bits into the target register.
    
    Example:
    
    R3 0407DED115077586
    R4 53778DF3CA992E09
    mullwo 3,3,4
    R3 expected : FB9D02730D7735B6
    R3 actual   : 000000000D7735B6 (without this patch)
    
    Signed-off-by: Tom Musta <tommusta at gmail.com>
    Signed-off-by: Alexander Graf <agraf at suse.de>

diff --git a/target-ppc/translate.c b/target-ppc/translate.c
index fab4f01..dc80b02 100644
--- a/target-ppc/translate.c
+++ b/target-ppc/translate.c
@@ -1140,11 +1140,20 @@ static void gen_mullwo(DisasContext *ctx)
 {
     TCGv_i32 t0 = tcg_temp_new_i32();
     TCGv_i32 t1 = tcg_temp_new_i32();
+#if defined(TARGET_PPC64)
+    TCGv_i64 t2 = tcg_temp_new_i64();
+#endif
 
     tcg_gen_trunc_tl_i32(t0, cpu_gpr[rA(ctx->opcode)]);
     tcg_gen_trunc_tl_i32(t1, cpu_gpr[rB(ctx->opcode)]);
     tcg_gen_muls2_i32(t0, t1, t0, t1);
     tcg_gen_ext_i32_tl(cpu_gpr[rD(ctx->opcode)], t0);
+#if defined(TARGET_PPC64)
+    tcg_gen_ext_i32_tl(t2, t1);
+    tcg_gen_deposit_i64(cpu_gpr[rD(ctx->opcode)],
+                        cpu_gpr[rD(ctx->opcode)], t2, 32, 32);
+    tcg_temp_free(t2);
+#endif
 
     tcg_gen_sari_i32(t0, t0, 31);
     tcg_gen_setcond_i32(TCG_COND_NE, t0, t0, t1);
commit 6ea7b35c0294b1cc462e3225c4672de31300ed79
Author: Tom Musta <tommusta at gmail.com>
Date:   Tue Aug 12 08:45:05 2014 -0500

    target-ppc: Bug Fix: rlwimi
    
    The rlwimi specification includes the ROTL32 operation, which is defined
    to be a left rotation of two copies of the least significant 32 bits of
    the source GPR.
    
    The current implementation is incorrect on 64-bit implementations in that
    it rotates a single copy of the least significant 32 bits, padding with
    zeroes in the most significant bits.
    
    Fix the code to properly implement this ROTL32 operation.
    
    Also fix the special case of MB=31 and ME=0 to copy the entire contents
    of the source GPR.
    
    Examples:
    
    R3 FFFFFFFFFFFFFFF0
    rlwimi 3,3,29,14,1
    R3 expected : 1FFFFFFE3FFFFFFE
    R3 actual   : 000000003FFFFFFE (without this patch)
    
    R3 ED7EB4DD824F0853
    rlwimi 3,3,10,31,0
    R3 expected : 3C214E09024F0853
    R3 actual   : 00000000024F0853 (without this patch)
    
    Signed-off-by: Tom Musta <tommusta at gmail.com>
    Signed-off-by: Alexander Graf <agraf at suse.de>

diff --git a/target-ppc/translate.c b/target-ppc/translate.c
index be7d40b..fab4f01 100644
--- a/target-ppc/translate.c
+++ b/target-ppc/translate.c
@@ -1617,17 +1617,19 @@ static void gen_rlwimi(DisasContext *ctx)
     me = ME(ctx->opcode);
     sh = SH(ctx->opcode);
     if (likely(sh == 0 && mb == 0 && me == 31)) {
+#if defined(TARGET_PPC64)
+        tcg_gen_mov_i64(cpu_gpr[rA(ctx->opcode)], cpu_gpr[rS(ctx->opcode)]);
+#else
         tcg_gen_ext32u_tl(cpu_gpr[rA(ctx->opcode)], cpu_gpr[rS(ctx->opcode)]);
+#endif
     } else {
         target_ulong mask;
         TCGv t1;
         TCGv t0 = tcg_temp_new();
 #if defined(TARGET_PPC64)
-        TCGv_i32 t2 = tcg_temp_new_i32();
-        tcg_gen_trunc_i64_i32(t2, cpu_gpr[rS(ctx->opcode)]);
-        tcg_gen_rotli_i32(t2, t2, sh);
-        tcg_gen_extu_i32_i64(t0, t2);
-        tcg_temp_free_i32(t2);
+        tcg_gen_deposit_i64(t0, cpu_gpr[rS(ctx->opcode)],
+            cpu_gpr[rS(ctx->opcode)], 32, 32);
+        tcg_gen_rotli_i64(t0, t0, sh);
 #else
         tcg_gen_rotli_i32(t0, cpu_gpr[rS(ctx->opcode)], sh);
 #endif
commit 1c0a150f4bb60ce9af7b4b10f0deabdea1b22f93
Author: Tom Musta <tommusta at gmail.com>
Date:   Tue Aug 12 08:45:04 2014 -0500

    target-ppc: Bug Fix: rlwnm
    
    The rlwnm specification includes the ROTL32 operation, which is defined
    to be a left rotation of two copies of the least significant 32 bits of
    the source GPR.
    
    The current implementation is incorrect on 64-bit implementations in that
    it rotates a single copy of the least significant 32 bits, padding with
    zeroes in the most significant bits.
    
    Fix the code to properly implement this ROTL32 operation.
    
    Example:
    
    R3 = 0000000000000002
    R4 = 7FFFFFFFFFFFFFFF
    rlwnm 3,3,4,31,16
    R3 expected : 0000000100000001
    R3 actual   : 0000000000000001 (without this patch)
    
    Signed-off-by: Tom Musta <tommusta at gmail.com>
    Signed-off-by: Alexander Graf <agraf at suse.de>

diff --git a/target-ppc/translate.c b/target-ppc/translate.c
index 44a8e1e..be7d40b 100644
--- a/target-ppc/translate.c
+++ b/target-ppc/translate.c
@@ -1698,7 +1698,7 @@ static void gen_rlwnm(DisasContext *ctx)
     uint32_t mb, me;
     TCGv t0;
 #if defined(TARGET_PPC64)
-    TCGv_i32 t1, t2;
+    TCGv t1;
 #endif
 
     mb = MB(ctx->opcode);
@@ -1706,14 +1706,11 @@ static void gen_rlwnm(DisasContext *ctx)
     t0 = tcg_temp_new();
     tcg_gen_andi_tl(t0, cpu_gpr[rB(ctx->opcode)], 0x1f);
 #if defined(TARGET_PPC64)
-    t1 = tcg_temp_new_i32();
-    t2 = tcg_temp_new_i32();
-    tcg_gen_trunc_i64_i32(t1, cpu_gpr[rS(ctx->opcode)]);
-    tcg_gen_trunc_i64_i32(t2, t0);
-    tcg_gen_rotl_i32(t1, t1, t2);
-    tcg_gen_extu_i32_i64(t0, t1);
-    tcg_temp_free_i32(t1);
-    tcg_temp_free_i32(t2);
+    t1 = tcg_temp_new_i64();
+    tcg_gen_deposit_i64(t1, cpu_gpr[rS(ctx->opcode)],
+        cpu_gpr[rS(ctx->opcode)], 32, 32);
+    tcg_gen_rotl_i64(t0, t1, t0);
+    tcg_temp_free_i64(t1);
 #else
     tcg_gen_rotl_i32(t0, cpu_gpr[rS(ctx->opcode)], t0);
 #endif
@@ -1724,6 +1721,9 @@ static void gen_rlwnm(DisasContext *ctx)
 #endif
         tcg_gen_andi_tl(cpu_gpr[rA(ctx->opcode)], t0, MASK(mb, me));
     } else {
+#if defined(TARGET_PPC64)
+        tcg_gen_andi_tl(t0, t0, MASK(32, 63));
+#endif
         tcg_gen_mov_tl(cpu_gpr[rA(ctx->opcode)], t0);
     }
     tcg_temp_free(t0);
commit a7f23d0f8bfbe76864a6427c0e21fe794ab9b7ef
Author: Tom Musta <tommusta at gmail.com>
Date:   Tue Aug 12 08:45:03 2014 -0500

    target-ppc: Bug Fix: rlwinm
    
    The rlwinm specification includes the ROTL32 operation, which is defined
    to be a left rotation of two copies of the least significant 32 bits of
    the source GPR.
    
    The current implementation is incorrect on 64-bit implementations in that
    it rotates a single copy of the least significant 32 bits, padding with
    zeroes in the most significant bits.
    
    Fix the code to properly implement this ROTL32 operation.
    
    Example:
    R3 = F7487D82EC6F75DF
    rlwinm 3,3,5,12,4
    
    R3 expected : 8DEEBBFD880EBBFD
    R3 actual   : 00000000880EBBFD (without this fix)
    
    Signed-off-by: Tom Musta <tommusta at gmail.com>
    Signed-off-by: Alexander Graf <agraf at suse.de>

diff --git a/target-ppc/translate.c b/target-ppc/translate.c
index c07bb01..44a8e1e 100644
--- a/target-ppc/translate.c
+++ b/target-ppc/translate.c
@@ -1675,11 +1675,9 @@ static void gen_rlwinm(DisasContext *ctx)
     } else {
         TCGv t0 = tcg_temp_new();
 #if defined(TARGET_PPC64)
-        TCGv_i32 t1 = tcg_temp_new_i32();
-        tcg_gen_trunc_i64_i32(t1, cpu_gpr[rS(ctx->opcode)]);
-        tcg_gen_rotli_i32(t1, t1, sh);
-        tcg_gen_extu_i32_i64(t0, t1);
-        tcg_temp_free_i32(t1);
+        tcg_gen_deposit_i64(t0, cpu_gpr[rS(ctx->opcode)],
+            cpu_gpr[rS(ctx->opcode)], 32, 32);
+        tcg_gen_rotli_i64(t0, t0, sh);
 #else
         tcg_gen_rotli_i32(t0, cpu_gpr[rS(ctx->opcode)], sh);
 #endif
commit 9674a356267ee9cf8230775f88c90c299a4affc9
Author: Nikunj A Dadhania <nikunj at linux.vnet.ibm.com>
Date:   Fri Jun 27 12:17:38 2014 +0530

    ppc/spapr: Fix MAX_CPUS to 255
    
    MAX_CPUS 256 is inconsistent with qemu supporting upto 255 cpus. This
    MAX_CPUS number was percolated back to "virsh capabilities" with wrong
    max_cpus.
    
    Signed-off-by: Nikunj A Dadhania <nikunj at linux.vnet.ibm.com>
    Signed-off-by: Alexander Graf <agraf at suse.de>

diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 2f16d9d..555a007 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -81,7 +81,7 @@
 
 #define TIMEBASE_FREQ           512000000ULL
 
-#define MAX_CPUS                256
+#define MAX_CPUS                255
 
 #define PHANDLE_XICP            0x00001111
 
commit 88365d17d586bcf0d9f4432447db345f72278a2a
Author: Bharat Bhushan <Bharat.Bhushan at freescale.com>
Date:   Mon Jul 14 14:45:37 2014 +0530

    ppc: Add hw breakpoint watchpoint support
    
    This patch adds hardware breakpoint and hardware watchpoint support
    for ppc.
    
    On BOOKE architecture we cannot share debug resources between QEMU
    and guest because:
        When QEMU is using debug resources then debug exception must
        be always enabled. To achieve this we set MSR_DE and also set
        MSRP_DEP so guest cannot change MSR_DE.
    
        When emulating debug resource for guest we want guest
        to control MSR_DE (enable/disable debug interrupt on need).
    
        So above mentioned two configuration cannot be supported
        at the same time. So the result is that we cannot share
        debug resources between QEMU and Guest on BOOKE architecture.
    
    In the current design QEMU gets priority over guest,
    this means that if QEMU is using debug resources then guest
    cannot use them and if guest is using debug resource then
    qemu can overwrite them.
    
    When QEMU is not able to handle debug exception then we inject program
    exception to guest. Yes program exception NOT debug exception and the
    reason is:
     1) QEMU and guest not sharing debug resources
     2) For software breakpoint QEMU uses a ehpriv-1 instruction;
    
     So there cannot be any reason that we are in qemu with exit reason
     KVM_EXIT_DEBUG  for guest set debug exception, only possibility is
     guest executed ehpriv-1 privilege instruction and that's why we are
     injecting program exception.
    
    Signed-off-by: Bharat Bhushan <Bharat.Bhushan at freescale.com>
    Signed-off-by: Alexander Graf <agraf at suse.de>

diff --git a/target-ppc/kvm.c b/target-ppc/kvm.c
index c29ed65..2b6fee6 100644
--- a/target-ppc/kvm.c
+++ b/target-ppc/kvm.c
@@ -38,6 +38,7 @@
 #include "hw/ppc/ppc.h"
 #include "sysemu/watchdog.h"
 #include "trace.h"
+#include "exec/gdbstub.h"
 
 //#define DEBUG_KVM
 
@@ -412,6 +413,38 @@ unsigned long kvm_arch_vcpu_id(CPUState *cpu)
     return ppc_get_vcpu_dt_id(POWERPC_CPU(cpu));
 }
 
+/* e500 supports 2 h/w breakpoint and 2 watchpoint.
+ * book3s supports only 1 watchpoint, so array size
+ * of 4 is sufficient for now.
+ */
+#define MAX_HW_BKPTS 4
+
+static struct HWBreakpoint {
+    target_ulong addr;
+    int type;
+} hw_debug_points[MAX_HW_BKPTS];
+
+static CPUWatchpoint hw_watchpoint;
+
+/* Default there is no breakpoint and watchpoint supported */
+static int max_hw_breakpoint;
+static int max_hw_watchpoint;
+static int nb_hw_breakpoint;
+static int nb_hw_watchpoint;
+
+static void kvmppc_hw_debug_points_init(CPUPPCState *cenv)
+{
+    if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
+        max_hw_breakpoint = 2;
+        max_hw_watchpoint = 2;
+    }
+
+    if ((max_hw_breakpoint + max_hw_watchpoint) > MAX_HW_BKPTS) {
+        fprintf(stderr, "Error initializing h/w breakpoints\n");
+        return;
+    }
+}
+
 int kvm_arch_init_vcpu(CPUState *cs)
 {
     PowerPCCPU *cpu = POWERPC_CPU(cs);
@@ -439,6 +472,7 @@ int kvm_arch_init_vcpu(CPUState *cs)
     }
 
     kvm_get_one_reg(cs, KVM_REG_PPC_DEBUG_INST, &debug_inst_opcode);
+    kvmppc_hw_debug_points_init(cenv);
 
     return ret;
 }
@@ -1303,12 +1337,163 @@ int kvm_arch_remove_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
     return 0;
 }
 
+static int find_hw_breakpoint(target_ulong addr, int type)
+{
+    int n;
+
+    assert((nb_hw_breakpoint + nb_hw_watchpoint)
+           <= ARRAY_SIZE(hw_debug_points));
+
+    for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
+        if (hw_debug_points[n].addr == addr &&
+             hw_debug_points[n].type == type) {
+            return n;
+        }
+    }
+
+    return -1;
+}
+
+static int find_hw_watchpoint(target_ulong addr, int *flag)
+{
+    int n;
+
+    n = find_hw_breakpoint(addr, GDB_WATCHPOINT_ACCESS);
+    if (n >= 0) {
+        *flag = BP_MEM_ACCESS;
+        return n;
+    }
+
+    n = find_hw_breakpoint(addr, GDB_WATCHPOINT_WRITE);
+    if (n >= 0) {
+        *flag = BP_MEM_WRITE;
+        return n;
+    }
+
+    n = find_hw_breakpoint(addr, GDB_WATCHPOINT_READ);
+    if (n >= 0) {
+        *flag = BP_MEM_READ;
+        return n;
+    }
+
+    return -1;
+}
+
+int kvm_arch_insert_hw_breakpoint(target_ulong addr,
+                                  target_ulong len, int type)
+{
+    if ((nb_hw_breakpoint + nb_hw_watchpoint) >= ARRAY_SIZE(hw_debug_points)) {
+        return -ENOBUFS;
+    }
+
+    hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].addr = addr;
+    hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].type = type;
+
+    switch (type) {
+    case GDB_BREAKPOINT_HW:
+        if (nb_hw_breakpoint >= max_hw_breakpoint) {
+            return -ENOBUFS;
+        }
+
+        if (find_hw_breakpoint(addr, type) >= 0) {
+            return -EEXIST;
+        }
+
+        nb_hw_breakpoint++;
+        break;
+
+    case GDB_WATCHPOINT_WRITE:
+    case GDB_WATCHPOINT_READ:
+    case GDB_WATCHPOINT_ACCESS:
+        if (nb_hw_watchpoint >= max_hw_watchpoint) {
+            return -ENOBUFS;
+        }
+
+        if (find_hw_breakpoint(addr, type) >= 0) {
+            return -EEXIST;
+        }
+
+        nb_hw_watchpoint++;
+        break;
+
+    default:
+        return -ENOSYS;
+    }
+
+    return 0;
+}
+
+int kvm_arch_remove_hw_breakpoint(target_ulong addr,
+                                  target_ulong len, int type)
+{
+    int n;
+
+    n = find_hw_breakpoint(addr, type);
+    if (n < 0) {
+        return -ENOENT;
+    }
+
+    switch (type) {
+    case GDB_BREAKPOINT_HW:
+        nb_hw_breakpoint--;
+        break;
+
+    case GDB_WATCHPOINT_WRITE:
+    case GDB_WATCHPOINT_READ:
+    case GDB_WATCHPOINT_ACCESS:
+        nb_hw_watchpoint--;
+        break;
+
+    default:
+        return -ENOSYS;
+    }
+    hw_debug_points[n] = hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint];
+
+    return 0;
+}
+
+void kvm_arch_remove_all_hw_breakpoints(void)
+{
+    nb_hw_breakpoint = nb_hw_watchpoint = 0;
+}
+
 void kvm_arch_update_guest_debug(CPUState *cs, struct kvm_guest_debug *dbg)
 {
+    int n;
+
     /* Software Breakpoint updates */
     if (kvm_sw_breakpoints_active(cs)) {
         dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP;
     }
+
+    assert((nb_hw_breakpoint + nb_hw_watchpoint)
+           <= ARRAY_SIZE(hw_debug_points));
+    assert((nb_hw_breakpoint + nb_hw_watchpoint) <= ARRAY_SIZE(dbg->arch.bp));
+
+    if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
+        dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP;
+        memset(dbg->arch.bp, 0, sizeof(dbg->arch.bp));
+        for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
+            switch (hw_debug_points[n].type) {
+            case GDB_BREAKPOINT_HW:
+                dbg->arch.bp[n].type = KVMPPC_DEBUG_BREAKPOINT;
+                break;
+            case GDB_WATCHPOINT_WRITE:
+                dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE;
+                break;
+            case GDB_WATCHPOINT_READ:
+                dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_READ;
+                break;
+            case GDB_WATCHPOINT_ACCESS:
+                dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE |
+                                        KVMPPC_DEBUG_WATCH_READ;
+                break;
+            default:
+                cpu_abort(cs, "Unsupported breakpoint type\n");
+            }
+            dbg->arch.bp[n].addr = hw_debug_points[n].addr;
+        }
+    }
 }
 
 static int kvm_handle_debug(PowerPCCPU *cpu, struct kvm_run *run)
@@ -1317,13 +1502,46 @@ static int kvm_handle_debug(PowerPCCPU *cpu, struct kvm_run *run)
     CPUPPCState *env = &cpu->env;
     struct kvm_debug_exit_arch *arch_info = &run->debug.arch;
     int handle = 0;
+    int n;
+    int flag = 0;
 
-    if (kvm_find_sw_breakpoint(cs, arch_info->address)) {
+    if (cs->singlestep_enabled) {
+        handle = 1;
+    } else if (arch_info->status) {
+        if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
+            if (arch_info->status & KVMPPC_DEBUG_BREAKPOINT) {
+                n = find_hw_breakpoint(arch_info->address, GDB_BREAKPOINT_HW);
+                if (n >= 0) {
+                    handle = 1;
+                }
+            } else if (arch_info->status & (KVMPPC_DEBUG_WATCH_READ |
+                                            KVMPPC_DEBUG_WATCH_WRITE)) {
+                n = find_hw_watchpoint(arch_info->address,  &flag);
+                if (n >= 0) {
+                    handle = 1;
+                    cs->watchpoint_hit = &hw_watchpoint;
+                    hw_watchpoint.vaddr = hw_debug_points[n].addr;
+                    hw_watchpoint.flags = flag;
+                }
+            }
+        }
+    } else if (kvm_find_sw_breakpoint(cs, arch_info->address)) {
         handle = 1;
     } else {
         /* QEMU is not able to handle debug exception, so inject
          * program exception to guest;
          * Yes program exception NOT debug exception !!
+         * When QEMU is using debug resources then debug exception must
+         * be always set. To achieve this we set MSR_DE and also set
+         * MSRP_DEP so guest cannot change MSR_DE.
+         * When emulating debug resource for guest we want guest
+         * to control MSR_DE (enable/disable debug interrupt on need).
+         * Supporting both configurations are NOT possible.
+         * So the result is that we cannot share debug resources
+         * between QEMU and Guest on BOOKE architecture.
+         * In the current design QEMU gets the priority over guest,
+         * this means that if QEMU is using debug resources then guest
+         * cannot use them;
          * For software breakpoint QEMU uses a privileged instruction;
          * So there cannot be any reason that we are here for guest
          * set debug exception, only possibility is guest executed a
@@ -2086,20 +2304,6 @@ void kvm_arch_init_irq_routing(KVMState *s)
 {
 }
 
-int kvm_arch_insert_hw_breakpoint(target_ulong addr, target_ulong len, int type)
-{
-    return -EINVAL;
-}
-
-int kvm_arch_remove_hw_breakpoint(target_ulong addr, target_ulong len, int type)
-{
-    return -EINVAL;
-}
-
-void kvm_arch_remove_all_hw_breakpoints(void)
-{
-}
-
 struct kvm_get_htab_buf {
     struct kvm_get_htab_header header;
     /*
commit 8a0548f94edecb96acb9b7fb9106ccc821c4996f
Author: Bharat Bhushan <Bharat.Bhushan at freescale.com>
Date:   Mon Jul 14 14:45:38 2014 +0530

    ppc: Add software breakpoint support
    
    This patch allow insert/remove software breakpoint.
    
    When QEMU is not able to handle debug exception then we inject
    program exception to guest because for software breakpoint QEMU
    uses a ehpriv-1 instruction;
    So there cannot be any reason that we are in qemu with exit reason
    KVM_EXIT_DEBUG  for guest set debug exception, only possibility is
    guest executed ehpriv-1 privilege instruction and that's why we are
    injecting program exception.
    
    Signed-off-by: Bharat Bhushan <Bharat.Bhushan at freescale.com>
    [agraf: make deflect comment booke/book3s agnostic]
    Signed-off-by: Alexander Graf <agraf at suse.de>

diff --git a/target-ppc/kvm.c b/target-ppc/kvm.c
index 05d2ac8..c29ed65 100644
--- a/target-ppc/kvm.c
+++ b/target-ppc/kvm.c
@@ -1275,6 +1275,75 @@ static int kvmppc_handle_dcr_write(CPUPPCState *env, uint32_t dcrn, uint32_t dat
     return 0;
 }
 
+int kvm_arch_insert_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
+{
+    /* Mixed endian case is not handled */
+    uint32_t sc = debug_inst_opcode;
+
+    if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
+                            sizeof(sc), 0) ||
+        cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 1)) {
+        return -EINVAL;
+    }
+
+    return 0;
+}
+
+int kvm_arch_remove_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
+{
+    uint32_t sc;
+
+    if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 0) ||
+        sc != debug_inst_opcode ||
+        cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
+                            sizeof(sc), 1)) {
+        return -EINVAL;
+    }
+
+    return 0;
+}
+
+void kvm_arch_update_guest_debug(CPUState *cs, struct kvm_guest_debug *dbg)
+{
+    /* Software Breakpoint updates */
+    if (kvm_sw_breakpoints_active(cs)) {
+        dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP;
+    }
+}
+
+static int kvm_handle_debug(PowerPCCPU *cpu, struct kvm_run *run)
+{
+    CPUState *cs = CPU(cpu);
+    CPUPPCState *env = &cpu->env;
+    struct kvm_debug_exit_arch *arch_info = &run->debug.arch;
+    int handle = 0;
+
+    if (kvm_find_sw_breakpoint(cs, arch_info->address)) {
+        handle = 1;
+    } else {
+        /* QEMU is not able to handle debug exception, so inject
+         * program exception to guest;
+         * Yes program exception NOT debug exception !!
+         * For software breakpoint QEMU uses a privileged instruction;
+         * So there cannot be any reason that we are here for guest
+         * set debug exception, only possibility is guest executed a
+         * privileged / illegal instruction and that's why we are
+         * injecting a program interrupt.
+         */
+
+        cpu_synchronize_state(cs);
+        /* env->nip is PC, so increment this by 4 to use
+         * ppc_cpu_do_interrupt(), which set srr0 = env->nip - 4.
+         */
+        env->nip += 4;
+        cs->exception_index = POWERPC_EXCP_PROGRAM;
+        env->error_code = POWERPC_EXCP_INVAL;
+        ppc_cpu_do_interrupt(cs);
+    }
+
+    return handle;
+}
+
 int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
 {
     PowerPCCPU *cpu = POWERPC_CPU(cs);
@@ -1315,6 +1384,16 @@ int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
         ret = 0;
         break;
 
+    case KVM_EXIT_DEBUG:
+        DPRINTF("handle debug exception\n");
+        if (kvm_handle_debug(cpu, run)) {
+            ret = EXCP_DEBUG;
+            break;
+        }
+        /* re-enter, this exception was guest-internal */
+        ret = 0;
+        break;
+
     default:
         fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
         ret = -1;
@@ -2007,16 +2086,6 @@ void kvm_arch_init_irq_routing(KVMState *s)
 {
 }
 
-int kvm_arch_insert_sw_breakpoint(CPUState *cpu, struct kvm_sw_breakpoint *bp)
-{
-    return -EINVAL;
-}
-
-int kvm_arch_remove_sw_breakpoint(CPUState *cpu, struct kvm_sw_breakpoint *bp)
-{
-    return -EINVAL;
-}
-
 int kvm_arch_insert_hw_breakpoint(target_ulong addr, target_ulong len, int type)
 {
     return -EINVAL;
@@ -2031,10 +2100,6 @@ void kvm_arch_remove_all_hw_breakpoints(void)
 {
 }
 
-void kvm_arch_update_guest_debug(CPUState *cpu, struct kvm_guest_debug *dbg)
-{
-}
-
 struct kvm_get_htab_buf {
     struct kvm_get_htab_header header;
     /*
commit c371c2e3e0d7ad979dc2bb9763223287fabdcc24
Author: Bharat Bhushan <Bharat.Bhushan at freescale.com>
Date:   Mon Jul 14 14:45:36 2014 +0530

    ppc: synchronize excp_vectors for injecting exception
    
    This patch synchronizes env->excp_vectors[] with env->iovr[].
    This is required for using the existing interrupt injection mechanism
    for kvm.
    
    Signed-off-by: Bharat Bhushan <Bharat.Bhushan at freescale.com>
    Signed-off-by: Alexander Graf <agraf at suse.de>

diff --git a/target-ppc/kvm.c b/target-ppc/kvm.c
index 126f7ee..05d2ac8 100644
--- a/target-ppc/kvm.c
+++ b/target-ppc/kvm.c
@@ -903,6 +903,11 @@ int kvm_arch_put_registers(CPUState *cs, int level)
     return ret;
 }
 
+static void kvm_sync_excp(CPUPPCState *env, int vector, int ivor)
+{
+     env->excp_vectors[vector] = env->spr[ivor] + env->spr[SPR_BOOKE_IVPR];
+}
+
 int kvm_arch_get_registers(CPUState *cs)
 {
     PowerPCCPU *cpu = POWERPC_CPU(cs);
@@ -985,35 +990,57 @@ int kvm_arch_get_registers(CPUState *cs)
 
         if (sregs.u.e.features & KVM_SREGS_E_IVOR) {
             env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0];
+            kvm_sync_excp(env, POWERPC_EXCP_CRITICAL,  SPR_BOOKE_IVOR0);
             env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1];
+            kvm_sync_excp(env, POWERPC_EXCP_MCHECK,  SPR_BOOKE_IVOR1);
             env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2];
+            kvm_sync_excp(env, POWERPC_EXCP_DSI,  SPR_BOOKE_IVOR2);
             env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3];
+            kvm_sync_excp(env, POWERPC_EXCP_ISI,  SPR_BOOKE_IVOR3);
             env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4];
+            kvm_sync_excp(env, POWERPC_EXCP_EXTERNAL,  SPR_BOOKE_IVOR4);
             env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5];
+            kvm_sync_excp(env, POWERPC_EXCP_ALIGN,  SPR_BOOKE_IVOR5);
             env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6];
+            kvm_sync_excp(env, POWERPC_EXCP_PROGRAM,  SPR_BOOKE_IVOR6);
             env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7];
+            kvm_sync_excp(env, POWERPC_EXCP_FPU,  SPR_BOOKE_IVOR7);
             env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8];
+            kvm_sync_excp(env, POWERPC_EXCP_SYSCALL,  SPR_BOOKE_IVOR8);
             env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9];
+            kvm_sync_excp(env, POWERPC_EXCP_APU,  SPR_BOOKE_IVOR9);
             env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10];
+            kvm_sync_excp(env, POWERPC_EXCP_DECR,  SPR_BOOKE_IVOR10);
             env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11];
+            kvm_sync_excp(env, POWERPC_EXCP_FIT,  SPR_BOOKE_IVOR11);
             env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12];
+            kvm_sync_excp(env, POWERPC_EXCP_WDT,  SPR_BOOKE_IVOR12);
             env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13];
+            kvm_sync_excp(env, POWERPC_EXCP_DTLB,  SPR_BOOKE_IVOR13);
             env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14];
+            kvm_sync_excp(env, POWERPC_EXCP_ITLB,  SPR_BOOKE_IVOR14);
             env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15];
+            kvm_sync_excp(env, POWERPC_EXCP_DEBUG,  SPR_BOOKE_IVOR15);
 
             if (sregs.u.e.features & KVM_SREGS_E_SPE) {
                 env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0];
+                kvm_sync_excp(env, POWERPC_EXCP_SPEU,  SPR_BOOKE_IVOR32);
                 env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1];
+                kvm_sync_excp(env, POWERPC_EXCP_EFPDI,  SPR_BOOKE_IVOR33);
                 env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2];
+                kvm_sync_excp(env, POWERPC_EXCP_EFPRI,  SPR_BOOKE_IVOR34);
             }
 
             if (sregs.u.e.features & KVM_SREGS_E_PM) {
                 env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3];
+                kvm_sync_excp(env, POWERPC_EXCP_EPERFM,  SPR_BOOKE_IVOR35);
             }
 
             if (sregs.u.e.features & KVM_SREGS_E_PC) {
                 env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4];
+                kvm_sync_excp(env, POWERPC_EXCP_DOORI,  SPR_BOOKE_IVOR36);
                 env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5];
+                kvm_sync_excp(env, POWERPC_EXCP_DOORCI, SPR_BOOKE_IVOR37);
             }
         }
 
commit 3c902d4469304a9fd78cbef8a927d44b847cde3f
Author: Bharat Bhushan <Bharat.Bhushan at freescale.com>
Date:   Mon Jul 14 14:45:35 2014 +0530

    ppc: debug stub: Get trap instruction opcode from KVM
    
    Get trap instruction opcode from KVM and this opcode will
    be used for setting software breakpoint in following patch
    
    Signed-off-by: Bharat Bhushan <Bharat.Bhushan at freescale.com>
    Signed-off-by: Alexander Graf <agraf at suse.de>

diff --git a/target-ppc/kvm.c b/target-ppc/kvm.c
index 8c9e79c..126f7ee 100644
--- a/target-ppc/kvm.c
+++ b/target-ppc/kvm.c
@@ -72,6 +72,8 @@ static int cap_papr;
 static int cap_htab_fd;
 static int cap_fixup_hcalls;
 
+static uint32_t debug_inst_opcode;
+
 /* XXX We have a race condition where we actually have a level triggered
  *     interrupt, but the infrastructure can't expose that yet, so the guest
  *     takes but ignores it, goes to sleep and never gets notified that there's
@@ -436,6 +438,8 @@ int kvm_arch_init_vcpu(CPUState *cs)
         break;
     }
 
+    kvm_get_one_reg(cs, KVM_REG_PPC_DEBUG_INST, &debug_inst_opcode);
+
     return ret;
 }
 
commit b7d1f77adaab790d20232df261d4e2ff6a77f556
Author: Benjamin Herrenschmidt <benh at kernel.crashing.org>
Date:   Mon Jul 21 13:02:04 2014 +1000

    spapr: Locate RTAS and device-tree based on real RMA
    
    We currently calculate the final RTAS and FDT location based on
    the early estimate of the RMA size, cropped to 256M on KVM since
    we only know the real RMA size at reset time which happens much
    later in the boot process.
    
    This means the FDT and RTAS end up right below 256M while they
    could be much higher, using precious RMA space and limiting
    what the OS bootloader can put there which has proved to be
    a problem with some OSes (such as when using very large initrd's)
    
    Fortunately, we do the actual copy of the device-tree into guest
    memory much later, during reset, late enough to be able to do it
    using the final RMA value, we just need to move the calculation
    to the right place.
    
    However, RTAS is still loaded too early, so we change the code to
    load the tiny blob into qemu memory early on, and then copy it into
    guest memory at reset time. It's small enough that the memory usage
    doesn't matter.
    
    Signed-off-by: Benjamin Herrenschmidt <benh at kernel.crashing.org>
    [aik: fixed errors from checkpatch.pl, defined RTAS_MAX_ADDR]
    Signed-off-by: Alexey Kardashevskiy <aik at ozlabs.ru>
    [agraf: fix compilation on 32bit hosts]
    Signed-off-by: Alexander Graf <agraf at suse.de>

diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 12dbf1b..2f16d9d 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -71,6 +71,7 @@
  */
 #define FDT_MAX_SIZE            0x40000
 #define RTAS_MAX_SIZE           0x10000
+#define RTAS_MAX_ADDR           0x80000000 /* RTAS must stay below that */
 #define FW_MAX_SIZE             0x400000
 #define FW_FILE_NAME            "slof.bin"
 #define FW_OVERHEAD             0x2800000
@@ -854,16 +855,30 @@ static void spapr_reset_htab(sPAPREnvironment *spapr)
 static void ppc_spapr_reset(void)
 {
     PowerPCCPU *first_ppc_cpu;
+    uint32_t rtas_limit;
 
     /* Reset the hash table & recalc the RMA */
     spapr_reset_htab(spapr);
 
     qemu_devices_reset();
 
+    /*
+     * We place the device tree and RTAS just below either the top of the RMA,
+     * or just below 2GB, whichever is lowere, so that it can be
+     * processed with 32-bit real mode code if necessary
+     */
+    rtas_limit = MIN(spapr->rma_size, RTAS_MAX_ADDR);
+    spapr->rtas_addr = rtas_limit - RTAS_MAX_SIZE;
+    spapr->fdt_addr = spapr->rtas_addr - FDT_MAX_SIZE;
+
     /* Load the fdt */
     spapr_finalize_fdt(spapr, spapr->fdt_addr, spapr->rtas_addr,
                        spapr->rtas_size);
 
+    /* Copy RTAS over */
+    cpu_physical_memory_write(spapr->rtas_addr, spapr->rtas_blob,
+                              spapr->rtas_size);
+
     /* Set up the entry state */
     first_ppc_cpu = POWERPC_CPU(first_cpu);
     first_ppc_cpu->env.gpr[3] = spapr->fdt_addr;
@@ -1283,7 +1298,7 @@ static void ppc_spapr_init(MachineState *machine)
     hwaddr node0_size = spapr_node0_size();
     uint32_t initrd_base = 0;
     long kernel_size = 0, initrd_size = 0;
-    long load_limit, rtas_limit, fw_size;
+    long load_limit, fw_size;
     bool kernel_le = false;
     char *filename;
 
@@ -1328,13 +1343,8 @@ static void ppc_spapr_init(MachineState *machine)
         exit(1);
     }
 
-    /* We place the device tree and RTAS just below either the top of the RMA,
-     * or just below 2GB, whichever is lowere, so that it can be
-     * processed with 32-bit real mode code if necessary */
-    rtas_limit = MIN(spapr->rma_size, 0x80000000);
-    spapr->rtas_addr = rtas_limit - RTAS_MAX_SIZE;
-    spapr->fdt_addr = spapr->rtas_addr - FDT_MAX_SIZE;
-    load_limit = spapr->fdt_addr - FW_OVERHEAD;
+    /* Setup a load limit for the ramdisk leaving room for SLOF and FDT */
+    load_limit = MIN(spapr->rma_size, RTAS_MAX_ADDR) - FW_OVERHEAD;
 
     /* We aim for a hash table of size 1/128 the size of RAM.  The
      * normal rule of thumb is 1/64 the size of RAM, but that's much
@@ -1402,14 +1412,14 @@ static void ppc_spapr_init(MachineState *machine)
     }
 
     filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, "spapr-rtas.bin");
-    spapr->rtas_size = load_image_targphys(filename, spapr->rtas_addr,
-                                           rtas_limit - spapr->rtas_addr);
-    if (spapr->rtas_size < 0) {
+    spapr->rtas_size = get_image_size(filename);
+    spapr->rtas_blob = g_malloc(spapr->rtas_size);
+    if (load_image_size(filename, spapr->rtas_blob, spapr->rtas_size) < 0) {
         hw_error("qemu: could not load LPAR rtas '%s'\n", filename);
         exit(1);
     }
     if (spapr->rtas_size > RTAS_MAX_SIZE) {
-        hw_error("RTAS too big ! 0x%lx bytes (max is 0x%x)\n",
+        hw_error("RTAS too big ! 0x%zx bytes (max is 0x%x)\n",
                  spapr->rtas_size, RTAS_MAX_SIZE);
         exit(1);
     }
diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
index 7fff979..36e8e51 100644
--- a/include/hw/ppc/spapr.h
+++ b/include/hw/ppc/spapr.h
@@ -24,7 +24,8 @@ typedef struct sPAPREnvironment {
     hwaddr rma_size;
     int vrma_adjust;
     hwaddr fdt_addr, rtas_addr;
-    long rtas_size;
+    ssize_t rtas_size;
+    void *rtas_blob;
     void *fdt_skel;
     target_ulong entry_point;
     uint64_t rtc_offset;
commit ea87616d6c44d998affef3d3b9fdfc49d14b8150
Author: Benjamin Herrenschmidt <benh at kernel.crashing.org>
Date:   Mon Jul 21 13:02:03 2014 +1000

    loader: Add load_image_size() to replace load_image()
    
    A subsequent patch to ppc/spapr needs to load the RTAS blob into
    qemu memory rather than target memory (so it can later be copied
    into the right spot at machine reset time).
    
    I would use load_image() but it is marked deprecated because it
    doesn't take a buffer size as argument, so let's add load_image_size()
    that does.
    
    Signed-off-by: Benjamin Herrenschmidt <benh at kernel.crashing.org>
    [aik: fixed errors from checkpatch.pl]
    Signed-off-by: Alexey Kardashevskiy <aik at ozlabs.ru>
    Signed-off-by: Alexander Graf <agraf at suse.de>

diff --git a/hw/core/loader.c b/hw/core/loader.c
index 193f0f8..597b117 100644
--- a/hw/core/loader.c
+++ b/hw/core/loader.c
@@ -89,6 +89,27 @@ int load_image(const char *filename, uint8_t *addr)
     return size;
 }
 
+/* return the size or -1 if error */
+ssize_t load_image_size(const char *filename, void *addr, size_t size)
+{
+    int fd;
+    ssize_t actsize;
+
+    fd = open(filename, O_RDONLY | O_BINARY);
+    if (fd < 0) {
+        return -1;
+    }
+
+    actsize = read(fd, addr, size);
+    if (actsize < 0) {
+        close(fd);
+        return -1;
+    }
+    close(fd);
+
+    return actsize;
+}
+
 /* read()-like version */
 ssize_t read_targphys(const char *name,
                       int fd, hwaddr dst_addr, size_t nbytes)
diff --git a/include/hw/loader.h b/include/hw/loader.h
index 00c9117..9190387 100644
--- a/include/hw/loader.h
+++ b/include/hw/loader.h
@@ -13,6 +13,7 @@
  */
 int get_image_size(const char *filename);
 int load_image(const char *filename, uint8_t *addr); /* deprecated */
+ssize_t load_image_size(const char *filename, void *addr, size_t size);
 int load_image_targphys(const char *filename, hwaddr,
                         uint64_t max_sz);
 int load_image_gzipped(const char *filename, hwaddr addr, uint64_t max_sz);
commit c3b4f589d86ae4a6b9f6c1e0587998bc525833da
Author: Alexey Kardashevskiy <aik at ozlabs.ru>
Date:   Thu Jul 3 13:10:07 2014 +1000

    spapr: Fix ibm, associativity for memory nodes
    
    We want the associtivity lists of memory and CPU nodes to match but
    memory nodes have incorrect domain#3 which is zero for CPU so they won't
    match.
    
    This clears domain#3 in the list to match CPUs associtivity lists.
    
    Signed-off-by: Alexey Kardashevskiy <aik at ozlabs.ru>
    Signed-off-by: Alexander Graf <agraf at suse.de>

diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 1623805..12dbf1b 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -653,7 +653,7 @@ static void spapr_populate_memory_node(void *fdt, int nodeid, hwaddr start,
     uint32_t associativity[] = {
         cpu_to_be32(0x4), /* length */
         cpu_to_be32(0x0), cpu_to_be32(0x0),
-        cpu_to_be32(nodeid), cpu_to_be32(nodeid)
+        cpu_to_be32(0x0), cpu_to_be32(nodeid)
     };
     char mem_name[32];
     uint64_t mem_reg_property[2];
commit b082d65a30078d176f8d1fbb3b99e1449fa2fcff
Author: Alexey Kardashevskiy <aik at ozlabs.ru>
Date:   Thu Jul 3 13:10:06 2014 +1000

    spapr: Add a helper for node0_size calculation
    
    In multiple places there is a node0_size variable calculation
    which assumes that NUMA node #0 and memory node #0 are the same
    things which they are not. Since we are going to change it and
    do not want to change it in multiple places, let's make a helper.
    
    This adds a spapr_node0_size() helper and makes use of it.
    
    Signed-off-by: Alexey Kardashevskiy <aik at ozlabs.ru>
    Signed-off-by: Alexander Graf <agraf at suse.de>

diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index f2fa11e..1623805 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -283,6 +283,19 @@ static size_t create_page_sizes_prop(CPUPPCState *env, uint32_t *prop,
     return (p - prop) * sizeof(uint32_t);
 }
 
+static hwaddr spapr_node0_size(void)
+{
+    if (nb_numa_nodes) {
+        int i;
+        for (i = 0; i < nb_numa_nodes; ++i) {
+            if (numa_info[i].node_mem) {
+                return MIN(pow2floor(numa_info[i].node_mem), ram_size);
+            }
+        }
+    }
+    return ram_size;
+}
+
 #define _FDT(exp) \
     do { \
         int ret = (exp);                                           \
@@ -833,9 +846,8 @@ static void spapr_reset_htab(sPAPREnvironment *spapr)
 
     /* Update the RMA size if necessary */
     if (spapr->vrma_adjust) {
-        hwaddr node0_size = (nb_numa_nodes > 1) ?
-            numa_info[0].node_mem : ram_size;
-        spapr->rma_size = kvmppc_rma_size(node0_size, spapr->htab_shift);
+        spapr->rma_size = kvmppc_rma_size(spapr_node0_size(),
+                                          spapr->htab_shift);
     }
 }
 
@@ -1268,7 +1280,7 @@ static void ppc_spapr_init(MachineState *machine)
     MemoryRegion *rma_region;
     void *rma = NULL;
     hwaddr rma_alloc_size;
-    hwaddr node0_size = (nb_numa_nodes > 1) ? numa_info[0].node_mem : ram_size;
+    hwaddr node0_size = spapr_node0_size();
     uint32_t initrd_base = 0;
     long kernel_size = 0, initrd_size = 0;
     long load_limit, rtas_limit, fw_size;
commit 6010818c30ce9c796b4e22fd261fc6fea1cecbfc
Author: Alexey Kardashevskiy <aik at ozlabs.ru>
Date:   Thu Jul 3 13:10:05 2014 +1000

    spapr: Split memory nodes to power-of-two blocks
    
    Linux kernel expects nodes to have power-of-two size and
    does WARN_ON if this is not the case:
    [    0.041456] WARNING: at drivers/base/memory.c:115
    which is:
    
    ===
    	/* Validate blk_sz is a power of 2 and not less than section size */
    	if ((block_sz & (block_sz - 1)) || (block_sz < MIN_MEMORY_BLOCK_SIZE)) {
            	WARN_ON(1);
    	        block_sz = MIN_MEMORY_BLOCK_SIZE;
    	}
    ===
    
    This splits memory nodes into set of smaller blocks with
    a size which is a power of two. This makes sure the start
    address of every node is aligned to the node size.
    
    Signed-off-by: Alexey Kardashevskiy <aik at ozlabs.ru>
    [agraf: squash windows compile fix in]
    Signed-off-by: Alexander Graf <agraf at suse.de>

diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 718a201..f2fa11e 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -691,8 +691,18 @@ static int spapr_populate_memory(sPAPREnvironment *spapr, void *fdt)
             mem_start += spapr->rma_size;
             node_size -= spapr->rma_size;
         }
-        spapr_populate_memory_node(fdt, i, mem_start, node_size);
-        mem_start += node_size;
+        for ( ; node_size; ) {
+            hwaddr sizetmp = pow2floor(node_size);
+
+            /* mem_start != 0 here */
+            if (ctzl(mem_start) < ctzl(sizetmp)) {
+                sizetmp = 1ULL << ctzl(mem_start);
+            }
+
+            spapr_populate_memory_node(fdt, i, mem_start, sizetmp);
+            node_size -= sizetmp;
+            mem_start += sizetmp;
+        }
     }
 
     return 0;
commit 7db8a127e373e468d1f61e46e01e50d1aa33e827
Author: Alexey Kardashevskiy <aik at ozlabs.ru>
Date:   Thu Jul 3 13:10:04 2014 +1000

    spapr: Refactor spapr_populate_memory() to allow memoryless nodes
    
    Current QEMU does not support memoryless NUMA nodes, however
    actual hardware may have them so it makes sense to have a way
    to emulate them in QEMU. This prepares SPAPR for that.
    
    This moves 2 calls of spapr_populate_memory_node() into
    the existing loop over numa nodes so first several nodes may
    have no memory and this still will work.
    
    If there is no numa configuration, the code assumes there is just
    a single node at 0 and it has all the guest memory.
    
    Signed-off-by: Alexey Kardashevskiy <aik at ozlabs.ru>
    Signed-off-by: Alexander Graf <agraf at suse.de>

diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 9b9b6c4..718a201 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -661,36 +661,36 @@ static void spapr_populate_memory_node(void *fdt, int nodeid, hwaddr start,
 
 static int spapr_populate_memory(sPAPREnvironment *spapr, void *fdt)
 {
-    hwaddr node0_size, mem_start, node_size;
-    int i;
-
-    /* memory node(s) */
-    if (nb_numa_nodes > 1 && numa_info[0].node_mem < ram_size) {
-        node0_size = numa_info[0].node_mem;
-    } else {
-        node0_size = ram_size;
-    }
+    hwaddr mem_start, node_size;
+    int i, nb_nodes = nb_numa_nodes;
+    NodeInfo *nodes = numa_info;
+    NodeInfo ramnode;
 
-    /* RMA */
-    spapr_populate_memory_node(fdt, 0, 0, spapr->rma_size);
-
-    /* RAM: Node 0 */
-    if (node0_size > spapr->rma_size) {
-        spapr_populate_memory_node(fdt, 0, spapr->rma_size,
-                                   node0_size - spapr->rma_size);
+    /* No NUMA nodes, assume there is just one node with whole RAM */
+    if (!nb_numa_nodes) {
+        nb_nodes = 1;
+        ramnode.node_mem = ram_size;
+        nodes = &ramnode;
     }
 
-    /* RAM: Node 1 and beyond */
-    mem_start = node0_size;
-    for (i = 1; i < nb_numa_nodes; i++) {
+    for (i = 0, mem_start = 0; i < nb_nodes; ++i) {
+        if (!nodes[i].node_mem) {
+            continue;
+        }
         if (mem_start >= ram_size) {
             node_size = 0;
         } else {
-            node_size = numa_info[i].node_mem;
+            node_size = nodes[i].node_mem;
             if (node_size > ram_size - mem_start) {
                 node_size = ram_size - mem_start;
             }
         }
+        if (!mem_start) {
+            /* ppc_spapr_init() checks for rma_size <= node0_size already */
+            spapr_populate_memory_node(fdt, i, 0, spapr->rma_size);
+            mem_start += spapr->rma_size;
+            node_size -= spapr->rma_size;
+        }
         spapr_populate_memory_node(fdt, i, mem_start, node_size);
         mem_start += node_size;
     }
commit 81014ac2b88b5fd275c33b463efe306668e920ed
Author: Alexey Kardashevskiy <aik at ozlabs.ru>
Date:   Thu Jul 3 13:10:03 2014 +1000

    spapr: Use DT memory node rendering helper for other nodes
    
    This finishes refactoring by using the spapr_populate_memory_node helper
    for all nodes and removing leftovers from spapr_populate_memory().
    
    This is not a part of the previous patch because the patches look
    nicer apart.
    
    Signed-off-by: Alexey Kardashevskiy <aik at ozlabs.ru>
    Signed-off-by: Alexander Graf <agraf at suse.de>

diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 28c8578..9b9b6c4 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -661,13 +661,8 @@ static void spapr_populate_memory_node(void *fdt, int nodeid, hwaddr start,
 
 static int spapr_populate_memory(sPAPREnvironment *spapr, void *fdt)
 {
-    uint32_t associativity[] = {cpu_to_be32(0x4), cpu_to_be32(0x0),
-                                cpu_to_be32(0x0), cpu_to_be32(0x0),
-                                cpu_to_be32(0x0)};
-    char mem_name[32];
     hwaddr node0_size, mem_start, node_size;
-    uint64_t mem_reg_property[2];
-    int i, off;
+    int i;
 
     /* memory node(s) */
     if (nb_numa_nodes > 1 && numa_info[0].node_mem < ram_size) {
@@ -688,7 +683,6 @@ static int spapr_populate_memory(sPAPREnvironment *spapr, void *fdt)
     /* RAM: Node 1 and beyond */
     mem_start = node0_size;
     for (i = 1; i < nb_numa_nodes; i++) {
-        mem_reg_property[0] = cpu_to_be64(mem_start);
         if (mem_start >= ram_size) {
             node_size = 0;
         } else {
@@ -697,16 +691,7 @@ static int spapr_populate_memory(sPAPREnvironment *spapr, void *fdt)
                 node_size = ram_size - mem_start;
             }
         }
-        mem_reg_property[1] = cpu_to_be64(node_size);
-        associativity[3] = associativity[4] = cpu_to_be32(i);
-        sprintf(mem_name, "memory@" TARGET_FMT_lx, mem_start);
-        off = fdt_add_subnode(fdt, 0, mem_name);
-        _FDT(off);
-        _FDT((fdt_setprop_string(fdt, off, "device_type", "memory")));
-        _FDT((fdt_setprop(fdt, off, "reg", mem_reg_property,
-                          sizeof(mem_reg_property))));
-        _FDT((fdt_setprop(fdt, off, "ibm,associativity", associativity,
-                          sizeof(associativity))));
+        spapr_populate_memory_node(fdt, i, mem_start, node_size);
         mem_start += node_size;
     }
 
commit 26a8c353bf0ffb485f4a68bea97efcef7d2bbaa3
Author: Alexey Kardashevskiy <aik at ozlabs.ru>
Date:   Thu Jul 3 13:10:02 2014 +1000

    spapr: Move DT memory node rendering to a helper
    
    This moves recurring bits of code related to memory at xxx nodes
    creation to a helper.
    
    This makes use of the new helper for node at 0.
    
    Signed-off-by: Alexey Kardashevskiy <aik at ozlabs.ru>
    Signed-off-by: Alexander Graf <agraf at suse.de>

diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 522ee27..28c8578 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -634,6 +634,31 @@ int spapr_h_cas_compose_response(target_ulong addr, target_ulong size)
     return 0;
 }
 
+static void spapr_populate_memory_node(void *fdt, int nodeid, hwaddr start,
+                                       hwaddr size)
+{
+    uint32_t associativity[] = {
+        cpu_to_be32(0x4), /* length */
+        cpu_to_be32(0x0), cpu_to_be32(0x0),
+        cpu_to_be32(nodeid), cpu_to_be32(nodeid)
+    };
+    char mem_name[32];
+    uint64_t mem_reg_property[2];
+    int off;
+
+    mem_reg_property[0] = cpu_to_be64(start);
+    mem_reg_property[1] = cpu_to_be64(size);
+
+    sprintf(mem_name, "memory@" TARGET_FMT_lx, start);
+    off = fdt_add_subnode(fdt, 0, mem_name);
+    _FDT(off);
+    _FDT((fdt_setprop_string(fdt, off, "device_type", "memory")));
+    _FDT((fdt_setprop(fdt, off, "reg", mem_reg_property,
+                      sizeof(mem_reg_property))));
+    _FDT((fdt_setprop(fdt, off, "ibm,associativity", associativity,
+                      sizeof(associativity))));
+}
+
 static int spapr_populate_memory(sPAPREnvironment *spapr, void *fdt)
 {
     uint32_t associativity[] = {cpu_to_be32(0x4), cpu_to_be32(0x0),
@@ -652,29 +677,12 @@ static int spapr_populate_memory(sPAPREnvironment *spapr, void *fdt)
     }
 
     /* RMA */
-    mem_reg_property[0] = 0;
-    mem_reg_property[1] = cpu_to_be64(spapr->rma_size);
-    off = fdt_add_subnode(fdt, 0, "memory at 0");
-    _FDT(off);
-    _FDT((fdt_setprop_string(fdt, off, "device_type", "memory")));
-    _FDT((fdt_setprop(fdt, off, "reg", mem_reg_property,
-                      sizeof(mem_reg_property))));
-    _FDT((fdt_setprop(fdt, off, "ibm,associativity", associativity,
-                      sizeof(associativity))));
+    spapr_populate_memory_node(fdt, 0, 0, spapr->rma_size);
 
     /* RAM: Node 0 */
     if (node0_size > spapr->rma_size) {
-        mem_reg_property[0] = cpu_to_be64(spapr->rma_size);
-        mem_reg_property[1] = cpu_to_be64(node0_size - spapr->rma_size);
-
-        sprintf(mem_name, "memory@" TARGET_FMT_lx, spapr->rma_size);
-        off = fdt_add_subnode(fdt, 0, mem_name);
-        _FDT(off);
-        _FDT((fdt_setprop_string(fdt, off, "device_type", "memory")));
-        _FDT((fdt_setprop(fdt, off, "reg", mem_reg_property,
-                          sizeof(mem_reg_property))));
-        _FDT((fdt_setprop(fdt, off, "ibm,associativity", associativity,
-                          sizeof(associativity))));
+        spapr_populate_memory_node(fdt, 0, spapr->rma_size,
+                                   node0_size - spapr->rma_size);
     }
 
     /* RAM: Node 1 and beyond */
commit a21a7a701252717f05defee8a1a33d72c28fabb7
Author: Gonglei <arei.gonglei at huawei.com>
Date:   Sat Jul 26 12:45:33 2014 +0800

    spapr: fix possible memory leak
    
    get_boot_devices_list() will malloc memory, spapr_finalize_fdt
    doesn't free it.
    
    Signed-off-by: Chenliang <chenliang88 at huawei.com>
    Signed-off-by: Gonglei <arei.gonglei at huawei.com>
    Signed-off-by: Alexander Graf <agraf at suse.de>

diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 0adea31..522ee27 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -783,6 +783,7 @@ static void spapr_finalize_fdt(sPAPREnvironment *spapr,
 
     cpu_physical_memory_write(fdt_addr, fdt, fdt_totalsize(fdt));
 
+    g_free(bootlist);
     g_free(fdt);
 }
 
commit 261265cc912b375649fcdf7aded0f87359dba544
Author: Alexander Graf <agraf at suse.de>
Date:   Fri Jul 11 03:24:39 2014 +0200

    PPC: mac99: Move NVRAM to page boundary when necessary
    
    When running KVM we have to adhere to host page boundaries for memory slots.
    Unfortunately the NVRAM on mac99 is a 4k RAM hole inside of an MMIO flash
    area.
    
    So if our host is configured with 64k page size, we can't use the mac99 target
    with KVM. This is a real shame, as this limitation is not really an issue - we
    can easily map NVRAM somewhere else and at least Linux and Mac OS X use it
    at their new location.
    
    So in that emergency case when it's about failing to run at all and moving NVRAM
    to a place it shouldn't be at, choose the latter.
    
    This patch enables -M mac99 with KVM on 64k page size hosts.
    
    Signed-off-by: Alexander Graf <agraf at suse.de>

diff --git a/hw/ppc/mac_newworld.c b/hw/ppc/mac_newworld.c
index 0693168..26067b4 100644
--- a/hw/ppc/mac_newworld.c
+++ b/hw/ppc/mac_newworld.c
@@ -176,6 +176,7 @@ static void ppc_core99_init(MachineState *machine)
     SysBusDevice *s;
     DeviceState *dev;
     int *token = g_new(int, 1);
+    hwaddr nvram_addr = 0xFFF04000;
 
     linux_boot = (kernel_filename != NULL);
 
@@ -426,11 +427,18 @@ static void ppc_core99_init(MachineState *machine)
     }
 
     /* The NewWorld NVRAM is not located in the MacIO device */
+#ifdef CONFIG_KVM
+    if (kvm_enabled() && getpagesize() > 4096) {
+        /* We can't combine read-write and read-only in a single page, so
+           move the NVRAM out of ROM again for KVM */
+        nvram_addr = 0xFFE00000;
+    }
+#endif
     dev = qdev_create(NULL, TYPE_MACIO_NVRAM);
     qdev_prop_set_uint32(dev, "size", 0x2000);
     qdev_prop_set_uint32(dev, "it_shift", 1);
     qdev_init_nofail(dev);
-    sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, 0xFFF04000);
+    sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, nvram_addr);
     nvr = MACIO_NVRAM(dev);
     pmac_format_nvram_partition(nvr, 0x2000);
     /* No PCI init: the BIOS will do it */
@@ -473,6 +481,7 @@ static void ppc_core99_init(MachineState *machine)
     /* Mac OS X requires a "known good" clock-frequency value; pass it one. */
     fw_cfg_add_i32(fw_cfg, FW_CFG_PPC_CLOCKFREQ, CLOCKFREQ);
     fw_cfg_add_i32(fw_cfg, FW_CFG_PPC_BUSFREQ, BUSFREQ);
+    fw_cfg_add_i32(fw_cfg, FW_CFG_PPC_NVRAM_ADDR, nvram_addr);
 
     qemu_register_boot_set(fw_cfg_boot_set, fw_cfg);
 }
diff --git a/include/hw/ppc/ppc.h b/include/hw/ppc/ppc.h
index 7e16e2e..14efd0c 100644
--- a/include/hw/ppc/ppc.h
+++ b/include/hw/ppc/ppc.h
@@ -92,7 +92,7 @@ enum {
 #define FW_CFG_PPC_IS_KVM       (FW_CFG_ARCH_LOCAL + 0x05)
 #define FW_CFG_PPC_KVM_HC       (FW_CFG_ARCH_LOCAL + 0x06)
 #define FW_CFG_PPC_KVM_PID      (FW_CFG_ARCH_LOCAL + 0x07)
-/* OpenBIOS has FW_CFG_PPC_NVRAM_ADDR as +0x08 */
+#define FW_CFG_PPC_NVRAM_ADDR   (FW_CFG_ARCH_LOCAL + 0x08)
 #define FW_CFG_PPC_BUSFREQ      (FW_CFG_ARCH_LOCAL + 0x09)
 
 #define PPC_SERIAL_MM_BAUDBASE 399193
commit ef9514431d33e52eb611f799670ca86618c1b7d9
Author: Nikunj A Dadhania <nikunj at linux.vnet.ibm.com>
Date:   Wed Jul 9 16:08:37 2014 +0530

    spapr: add uuid/host details to device tree
    
    Useful for identifying the guest/host uniquely within the
    guest. Adding following properties to the guest root node.
    
    vm,uuid - uuid of the guest
    host-model - Host model number
    host-serial - Host machine serial number
    hypervisor type - Tells its "kvm"
    
    Signed-off-by: Nikunj A Dadhania <nikunj at linux.vnet.ibm.com>
    Signed-off-by: Alexander Graf <agraf at suse.de>

diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 6bb646c..0adea31 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -319,6 +319,7 @@ static void *spapr_create_fdt_skel(hwaddr initrd_base,
     QemuOpts *opts = qemu_opts_find(qemu_find_opts("smp-opts"), NULL);
     unsigned sockets = opts ? qemu_opt_get_number(opts, "sockets", 0) : 0;
     uint32_t cpus_per_socket = sockets ? (smp_cpus / sockets) : 1;
+    char *buf;
 
     add_str(hypertas, "hcall-pft");
     add_str(hypertas, "hcall-term");
@@ -348,6 +349,33 @@ static void *spapr_create_fdt_skel(hwaddr initrd_base,
     _FDT((fdt_property_string(fdt, "model", "IBM pSeries (emulated by qemu)")));
     _FDT((fdt_property_string(fdt, "compatible", "qemu,pseries")));
 
+    if (kvm_enabled()) {
+        _FDT((fdt_property_string(fdt, "hypervisor", "kvm")));
+    }
+
+    /*
+     * Add info to guest to indentify which host is it being run on
+     * and what is the uuid of the guest
+     */
+    if (kvmppc_get_host_model(&buf)) {
+        _FDT((fdt_property_string(fdt, "host-model", buf)));
+        g_free(buf);
+    }
+    if (kvmppc_get_host_serial(&buf)) {
+        _FDT((fdt_property_string(fdt, "host-serial", buf)));
+        g_free(buf);
+    }
+
+    buf = g_strdup_printf(UUID_FMT, qemu_uuid[0], qemu_uuid[1],
+                          qemu_uuid[2], qemu_uuid[3], qemu_uuid[4],
+                          qemu_uuid[5], qemu_uuid[6], qemu_uuid[7],
+                          qemu_uuid[8], qemu_uuid[9], qemu_uuid[10],
+                          qemu_uuid[11], qemu_uuid[12], qemu_uuid[13],
+                          qemu_uuid[14], qemu_uuid[15]);
+
+    _FDT((fdt_property_string(fdt, "vm,uuid", buf)));
+    g_free(buf);
+
     _FDT((fdt_property_cell(fdt, "#address-cells", 0x2)));
     _FDT((fdt_property_cell(fdt, "#size-cells", 0x2)));
 
diff --git a/target-ppc/kvm.c b/target-ppc/kvm.c
index 42718f7..8c9e79c 100644
--- a/target-ppc/kvm.c
+++ b/target-ppc/kvm.c
@@ -1369,7 +1369,7 @@ static int read_cpuinfo(const char *field, char *value, int len)
     }
 
     do {
-        if(!fgets(line, sizeof(line), f)) {
+        if (!fgets(line, sizeof(line), f)) {
             break;
         }
         if (!strncmp(line, field, field_len)) {
@@ -1404,6 +1404,17 @@ uint32_t kvmppc_get_tbfreq(void)
     return retval;
 }
 
+bool kvmppc_get_host_serial(char **value)
+{
+    return g_file_get_contents("/proc/device-tree/system-id", value, NULL,
+                               NULL);
+}
+
+bool kvmppc_get_host_model(char **value)
+{
+    return g_file_get_contents("/proc/device-tree/model", value, NULL, NULL);
+}
+
 /* Try to find a device tree node for a CPU with clock-frequency property */
 static int kvmppc_find_cpu_dt(char *buf, int buf_len)
 {
diff --git a/target-ppc/kvm_ppc.h b/target-ppc/kvm_ppc.h
index d9516e7..2e0224c 100644
--- a/target-ppc/kvm_ppc.h
+++ b/target-ppc/kvm_ppc.h
@@ -19,6 +19,8 @@ uint32_t kvmppc_get_tbfreq(void);
 uint64_t kvmppc_get_clockfreq(void);
 uint32_t kvmppc_get_vmx(void);
 uint32_t kvmppc_get_dfp(void);
+bool kvmppc_get_host_model(char **buf);
+bool kvmppc_get_host_serial(char **buf);
 int kvmppc_get_hasidle(CPUPPCState *env);
 int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len);
 int kvmppc_set_interrupt(PowerPCCPU *cpu, int irq, int level);
@@ -60,6 +62,16 @@ static inline uint32_t kvmppc_get_tbfreq(void)
     return 0;
 }
 
+static inline bool kvmppc_get_host_model(char **buf)
+{
+    return false;
+}
+
+static inline bool kvmppc_get_host_serial(char **buf)
+{
+    return false;
+}
+
 static inline uint64_t kvmppc_get_clockfreq(void)
 {
     return 0;
commit 7d0cd464a756f3d47f308d7c47eb888b573a9fe4
Author: Peter Maydell <peter.maydell at linaro.org>
Date:   Tue Jul 8 16:02:26 2014 +0100

    hw/ppc/spapr_hcall.c: Fix typo in function names
    
    Fix a typo in the names of a couple of functions
    (s/resouce/resource/).
    
    Signed-off-by: Peter Maydell <peter.maydell at linaro.org>
    Signed-off-by: Alexander Graf <agraf at suse.de>

diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c
index 467858c..8651447 100644
--- a/hw/ppc/spapr_hcall.c
+++ b/hw/ppc/spapr_hcall.c
@@ -712,10 +712,10 @@ static target_ulong h_logical_dcbf(PowerPCCPU *cpu, sPAPREnvironment *spapr,
     return H_SUCCESS;
 }
 
-static target_ulong h_set_mode_resouce_le(PowerPCCPU *cpu,
-                                          target_ulong mflags,
-                                          target_ulong value1,
-                                          target_ulong value2)
+static target_ulong h_set_mode_resource_le(PowerPCCPU *cpu,
+                                           target_ulong mflags,
+                                           target_ulong value1,
+                                           target_ulong value2)
 {
     CPUState *cs;
 
@@ -743,10 +743,10 @@ static target_ulong h_set_mode_resouce_le(PowerPCCPU *cpu,
     return H_UNSUPPORTED_FLAG;
 }
 
-static target_ulong h_set_mode_resouce_addr_trans_mode(PowerPCCPU *cpu,
-                                                       target_ulong mflags,
-                                                       target_ulong value1,
-                                                       target_ulong value2)
+static target_ulong h_set_mode_resource_addr_trans_mode(PowerPCCPU *cpu,
+                                                        target_ulong mflags,
+                                                        target_ulong value1,
+                                                        target_ulong value2)
 {
     CPUState *cs;
     PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cpu);
@@ -794,11 +794,11 @@ static target_ulong h_set_mode(PowerPCCPU *cpu, sPAPREnvironment *spapr,
 
     switch (resource) {
     case H_SET_MODE_RESOURCE_LE:
-        ret = h_set_mode_resouce_le(cpu, args[0], args[2], args[3]);
+        ret = h_set_mode_resource_le(cpu, args[0], args[2], args[3]);
         break;
     case H_SET_MODE_RESOURCE_ADDR_TRANS_MODE:
-        ret = h_set_mode_resouce_addr_trans_mode(cpu, args[0],
-                                                 args[2], args[3]);
+        ret = h_set_mode_resource_addr_trans_mode(cpu, args[0],
+                                                  args[2], args[3]);
         break;
     }
 
commit 145855801a002aaf1310630df41425a6bc39cf47
Author: Tom Musta <tommusta at gmail.com>
Date:   Mon Jun 30 08:13:42 2014 -0500

    linux-user: Handle PPC64 ELFv2 Function Pointers
    
    Function pointers in the 64-bit ELFv2 PowerPC ABI are actual (internal)
    entry point addresses.  However, when invoking a function via a function
    pointer, GPR 12 must also be set to this address so that the TOC may be
    handled properly.
    
    Add this support to the invocation of a signal handler.
    
    Signed-off-by: Tom Musta <tommusta at gmail.com>
    Signed-off-by: Alexander Graf <agraf at suse.de>

diff --git a/linux-user/signal.c b/linux-user/signal.c
index 281d1c4..e11b208 100644
--- a/linux-user/signal.c
+++ b/linux-user/signal.c
@@ -4681,6 +4681,9 @@ static void setup_frame(int sig, struct target_sigaction *ka,
     target_ulong frame_addr, newsp;
     int err = 0;
     int signal;
+#if defined(TARGET_PPC64)
+    struct image_info *image = ((TaskState *)thread_cpu->opaque)->info;
+#endif
 
     frame_addr = get_sigframe(ka, env, sizeof(*frame));
     if (!lock_user_struct(VERIFY_WRITE, frame, frame_addr, 1))
@@ -4725,11 +4728,18 @@ static void setup_frame(int sig, struct target_sigaction *ka,
     env->gpr[4] = frame_addr + offsetof(struct target_sigframe, sctx);
 
 #if defined(TARGET_PPC64)
-    /* PPC64 function pointers are pointers to OPD entries. */
-    struct target_func_ptr *handler =
-        (struct target_func_ptr *)g2h(ka->_sa_handler);
-    env->nip = tswapl(handler->entry);
-    env->gpr[2] = tswapl(handler->toc);
+    if (get_ppc64_abi(image) < 2) {
+        /* ELFv1 PPC64 function pointers are pointers to OPD entries. */
+        struct target_func_ptr *handler =
+            (struct target_func_ptr *)g2h(ka->_sa_handler);
+        env->nip = tswapl(handler->entry);
+        env->gpr[2] = tswapl(handler->toc);
+    } else {
+        /* ELFv2 PPC64 function pointers are entry points, but R12
+         * must also be set */
+        env->nip = tswapl((target_ulong) ka->_sa_handler);
+        env->gpr[12] = env->nip;
+    }
 #else
     env->nip = (target_ulong) ka->_sa_handler;
 #endif
@@ -4756,6 +4766,9 @@ static void setup_rt_frame(int sig, struct target_sigaction *ka,
     target_ulong rt_sf_addr, newsp = 0;
     int i, err = 0;
     int signal;
+#if defined(TARGET_PPC64)
+    struct image_info *image = ((TaskState *)thread_cpu->opaque)->info;
+#endif
 
     rt_sf_addr = get_sigframe(ka, env, sizeof(*rt_sf));
     if (!lock_user_struct(VERIFY_WRITE, rt_sf, rt_sf_addr, 1))
@@ -4814,11 +4827,18 @@ static void setup_rt_frame(int sig, struct target_sigaction *ka,
     env->gpr[6] = (target_ulong) h2g(rt_sf);
 
 #if defined(TARGET_PPC64)
-    /* PPC64 function pointers are pointers to OPD entries.  */
-    struct target_func_ptr *handler =
-        (struct target_func_ptr *)g2h(ka->_sa_handler);
-    env->nip = tswapl(handler->entry);
-    env->gpr[2] = tswapl(handler->toc);
+    if (get_ppc64_abi(image) < 2) {
+        /* ELFv1 PPC64 function pointers are pointers to OPD entries. */
+        struct target_func_ptr *handler =
+            (struct target_func_ptr *)g2h(ka->_sa_handler);
+        env->nip = tswapl(handler->entry);
+        env->gpr[2] = tswapl(handler->toc);
+    } else {
+        /* ELFv2 PPC64 function pointers are entry points, but R12
+         * must also be set */
+        env->nip = tswapl((target_ulong) ka->_sa_handler);
+        env->gpr[12] = env->nip;
+    }
 #else
     env->nip = (target_ulong) ka->_sa_handler;
 #endif
commit 19774ec5c4019d1802b497d702e1eefd3c193e84
Author: Tom Musta <tommusta at gmail.com>
Date:   Mon Jun 30 08:13:40 2014 -0500

    linux-user: Implement do_setcontext for PPC64
    
    Eliminate the stub for the do_setcontext() function for TARGET_PPC64.  The
    implementation re-uses the existing TARGET_PPC32 code with the only change
    being the computation of the address of the register save area.
    
    Signed-off-by: Tom Musta <tommusta at gmail.com>
    Signed-off-by: Alexander Graf <agraf at suse.de>

diff --git a/linux-user/signal.c b/linux-user/signal.c
index e8e49db..281d1c4 100644
--- a/linux-user/signal.c
+++ b/linux-user/signal.c
@@ -4877,10 +4877,6 @@ sigsegv:
 /* See arch/powerpc/kernel/signal_32.c.  */
 static int do_setcontext(struct target_ucontext *ucp, CPUPPCState *env, int sig)
 {
-#if defined(TARGET_PPC64)
-    fprintf(stderr, "do_setcontext: not implemented\n");
-    return 0;
-#else
     struct target_mcontext *mcp;
     target_ulong mcp_addr;
     sigset_t blocked;
@@ -4890,7 +4886,12 @@ static int do_setcontext(struct target_ucontext *ucp, CPUPPCState *env, int sig)
                        sizeof (set)))
         return 1;
 
+#if defined(TARGET_PPC64)
+    mcp_addr = h2g(ucp) +
+        offsetof(struct target_ucontext, tuc_sigcontext.mcontext);
+#else
     __get_user(mcp_addr, &ucp->tuc_regs);
+#endif
 
     if (!lock_user_struct(VERIFY_READ, mcp, mcp_addr, 1))
         return 1;
@@ -4901,7 +4902,6 @@ static int do_setcontext(struct target_ucontext *ucp, CPUPPCState *env, int sig)
 
     unlock_user_struct(mcp, mcp_addr, 1);
     return 0;
-#endif
 }
 
 long do_rt_sigreturn(CPUPPCState *env)
commit 8d6ab333ebf2751d2467bd7dae94a04742052e26
Author: Tom Musta <tommusta at gmail.com>
Date:   Mon Jun 30 08:13:39 2014 -0500

    linux-user: Properly Dereference PPC64 ELFv1 Signal Handler Pointer
    
    Properly dereference 64-bit PPC ELF V1 ABIT function pointers to signal handlers.
    On this platform, function pointers are pointers to structures and the first 64
    bits of such a structure contains the function's entry point.  The second 64 bits
    contains the TOC pointer, which must be placed into GPR 2.
    
    Signed-off-by: Tom Musta <tommusta at gmail.com>
    Signed-off-by: Alexander Graf <agraf at suse.de>

diff --git a/linux-user/signal.c b/linux-user/signal.c
index 56c38db..e8e49db 100644
--- a/linux-user/signal.c
+++ b/linux-user/signal.c
@@ -4483,6 +4483,15 @@ struct target_rt_sigframe {
 
 #endif
 
+#if defined(TARGET_PPC64)
+
+struct target_func_ptr {
+    target_ulong entry;
+    target_ulong toc;
+};
+
+#endif
+
 /* We use the mc_pad field for the signal return trampoline.  */
 #define tramp mc_pad
 
@@ -4714,7 +4723,17 @@ static void setup_frame(int sig, struct target_sigaction *ka,
     env->gpr[1] = newsp;
     env->gpr[3] = signal;
     env->gpr[4] = frame_addr + offsetof(struct target_sigframe, sctx);
+
+#if defined(TARGET_PPC64)
+    /* PPC64 function pointers are pointers to OPD entries. */
+    struct target_func_ptr *handler =
+        (struct target_func_ptr *)g2h(ka->_sa_handler);
+    env->nip = tswapl(handler->entry);
+    env->gpr[2] = tswapl(handler->toc);
+#else
     env->nip = (target_ulong) ka->_sa_handler;
+#endif
+
     /* Signal handlers are entered in big-endian mode.  */
     env->msr &= ~MSR_LE;
 
@@ -4793,7 +4812,17 @@ static void setup_rt_frame(int sig, struct target_sigaction *ka,
     env->gpr[4] = (target_ulong) h2g(&rt_sf->info);
     env->gpr[5] = (target_ulong) h2g(&rt_sf->uc);
     env->gpr[6] = (target_ulong) h2g(rt_sf);
+
+#if defined(TARGET_PPC64)
+    /* PPC64 function pointers are pointers to OPD entries.  */
+    struct target_func_ptr *handler =
+        (struct target_func_ptr *)g2h(ka->_sa_handler);
+    env->nip = tswapl(handler->entry);
+    env->gpr[2] = tswapl(handler->toc);
+#else
     env->nip = (target_ulong) ka->_sa_handler;
+#endif
+
     /* Signal handlers are entered in big-endian mode.  */
     env->msr &= ~MSR_LE;
 
commit 61e75fecef17aff4b0ecef26b300b90de410aaa2
Author: Tom Musta <tommusta at gmail.com>
Date:   Mon Jun 30 08:13:38 2014 -0500

    linux-user: Enable Signal Handlers on PPC64
    
    Enable the 64-bit PowerPC signal handling code that was previously
    disabled via #ifdefs.  Specifically:
    
      - Move the target_mcontext (register save area) structure and
        append it to the 64-bit target_sigcontext structure.  This
        provides the space on the stack for saving and restoring
        context.
      - Define the target_rt_sigframe for 64-bit.
      - Adjust the setup_frame and setup_rt_frame routines to properly
        select the target_mcontext area and trampoline within the stack
        frame; tthis is different for 32-bit and 64-bit implementations.
      - Adjust the do_setcontext stub for 64-bit so that it compiles
        without warnings.
    
    The 64-bit signal handling code is still not functional after this
    change; but the 32-bit code is.  Subsequent changes will address
    specific issues with the 64-bit code.
    
    Signed-off-by: Tom Musta <tommusta at gmail.com>
    [agraf: fix build on 32bit hosts, ppc64abi32]
    Signed-off-by: Alexander Graf <agraf at suse.de>

diff --git a/linux-user/signal.c b/linux-user/signal.c
index 7365d5d..56c38db 100644
--- a/linux-user/signal.c
+++ b/linux-user/signal.c
@@ -4325,15 +4325,7 @@ badframe:
     return 0;
 }
 
-#elif defined(TARGET_PPC) && !defined(TARGET_PPC64)
-
-/* FIXME: Many of the structures are defined for both PPC and PPC64, but
-   the signal handling is different enough that we haven't implemented
-   support for PPC64 yet.  Hence the restriction above.
-
-   There are various #if'd blocks for code for TARGET_PPC64.  These
-   blocks should go away so that we can successfully run 32-bit and
-   64-bit binaries on a QEMU configured for PPC64.  */
+#elif defined(TARGET_PPC)
 
 /* Size of dummy stack frame allocated when calling signal handler.
    See arch/powerpc/include/asm/ptrace.h.  */
@@ -4343,6 +4335,33 @@ badframe:
 #define SIGNAL_FRAMESIZE 64
 #endif
 
+/* See arch/powerpc/include/asm/ucontext.h.  Only used for 32-bit PPC;
+   on 64-bit PPC, sigcontext and mcontext are one and the same.  */
+struct target_mcontext {
+    target_ulong mc_gregs[48];
+    /* Includes fpscr.  */
+    uint64_t mc_fregs[33];
+    target_ulong mc_pad[2];
+    /* We need to handle Altivec and SPE at the same time, which no
+       kernel needs to do.  Fortunately, the kernel defines this bit to
+       be Altivec-register-large all the time, rather than trying to
+       twiddle it based on the specific platform.  */
+    union {
+        /* SPE vector registers.  One extra for SPEFSCR.  */
+        uint32_t spe[33];
+        /* Altivec vector registers.  The packing of VSCR and VRSAVE
+           varies depending on whether we're PPC64 or not: PPC64 splits
+           them apart; PPC32 stuffs them together.  */
+#if defined(TARGET_PPC64)
+#define QEMU_NVRREG 34
+#else
+#define QEMU_NVRREG 33
+#endif
+        ppc_avr_t altivec[QEMU_NVRREG];
+#undef QEMU_NVRREG
+    } mc_vregs __attribute__((__aligned__(16)));
+};
+
 /* See arch/powerpc/include/asm/sigcontext.h.  */
 struct target_sigcontext {
     target_ulong _unused[4];
@@ -4353,7 +4372,9 @@ struct target_sigcontext {
     target_ulong handler;
     target_ulong oldmask;
     target_ulong regs;      /* struct pt_regs __user * */
-    /* TODO: PPC64 includes extra bits here.  */
+#if defined(TARGET_PPC64)
+    struct target_mcontext mcontext;
+#endif
 };
 
 /* Indices for target_mcontext.mc_gregs, below.
@@ -4408,32 +4429,6 @@ enum {
     TARGET_PT_REGS_COUNT = 44
 };
 
-/* See arch/powerpc/include/asm/ucontext.h.  Only used for 32-bit PPC;
-   on 64-bit PPC, sigcontext and mcontext are one and the same.  */
-struct target_mcontext {
-    target_ulong mc_gregs[48];
-    /* Includes fpscr.  */
-    uint64_t mc_fregs[33];
-    target_ulong mc_pad[2];
-    /* We need to handle Altivec and SPE at the same time, which no
-       kernel needs to do.  Fortunately, the kernel defines this bit to
-       be Altivec-register-large all the time, rather than trying to
-       twiddle it based on the specific platform.  */
-    union {
-        /* SPE vector registers.  One extra for SPEFSCR.  */
-        uint32_t spe[33];
-        /* Altivec vector registers.  The packing of VSCR and VRSAVE
-           varies depending on whether we're PPC64 or not: PPC64 splits
-           them apart; PPC32 stuffs them together.  */
-#if defined(TARGET_PPC64)
-#define QEMU_NVRREG 34
-#else
-#define QEMU_NVRREG 33
-#endif
-        ppc_avr_t altivec[QEMU_NVRREG];
-#undef QEMU_NVRREG
-    } mc_vregs __attribute__((__aligned__(16)));
-};
 
 struct target_ucontext {
     target_ulong tuc_flags;
@@ -4447,7 +4442,7 @@ struct target_ucontext {
     target_sigset_t tuc_sigmask;
 #if defined(TARGET_PPC64)
     target_sigset_t unused[15]; /* Allow for uc_sigmask growth */
-    struct target_sigcontext tuc_mcontext;
+    struct target_sigcontext tuc_sigcontext;
 #else
     int32_t tuc_maskext[30];
     int32_t tuc_pad2[3];
@@ -4462,12 +4457,32 @@ struct target_sigframe {
     int32_t abigap[56];
 };
 
+#if defined(TARGET_PPC64)
+
+#define TARGET_TRAMP_SIZE 6
+
+struct target_rt_sigframe {
+        /* sys_rt_sigreturn requires the ucontext be the first field */
+        struct target_ucontext uc;
+        target_ulong  _unused[2];
+        uint32_t trampoline[TARGET_TRAMP_SIZE];
+        target_ulong pinfo; /* struct siginfo __user * */
+        target_ulong puc; /* void __user * */
+        struct target_siginfo info;
+        /* 64 bit ABI allows for 288 bytes below sp before decrementing it. */
+        char abigap[288];
+} __attribute__((aligned(16)));
+
+#else
+
 struct target_rt_sigframe {
     struct target_siginfo info;
     struct target_ucontext uc;
     int32_t abigap[56];
 };
 
+#endif
+
 /* We use the mc_pad field for the signal return trampoline.  */
 #define tramp mc_pad
 
@@ -4667,7 +4682,7 @@ static void setup_frame(int sig, struct target_sigaction *ka,
 
     __put_user(ka->_sa_handler, &sc->handler);
     __put_user(set->sig[0], &sc->oldmask);
-#if defined(TARGET_PPC64)
+#if TARGET_ABI_BITS == 64
     __put_user(set->sig[0] >> 32, &sc->_unused[3]);
 #else
     __put_user(set->sig[1], &sc->_unused[3]);
@@ -4717,7 +4732,8 @@ static void setup_rt_frame(int sig, struct target_sigaction *ka,
                            target_sigset_t *set, CPUPPCState *env)
 {
     struct target_rt_sigframe *rt_sf;
-    struct target_mcontext *frame;
+    uint32_t *trampptr = 0;
+    struct target_mcontext *mctx = 0;
     target_ulong rt_sf_addr, newsp = 0;
     int i, err = 0;
     int signal;
@@ -4738,19 +4754,28 @@ static void setup_rt_frame(int sig, struct target_sigaction *ka,
                &rt_sf->uc.tuc_stack.ss_flags);
     __put_user(target_sigaltstack_used.ss_size,
                &rt_sf->uc.tuc_stack.ss_size);
+#if !defined(TARGET_PPC64)
     __put_user(h2g (&rt_sf->uc.tuc_mcontext),
                &rt_sf->uc.tuc_regs);
+#endif
     for(i = 0; i < TARGET_NSIG_WORDS; i++) {
         __put_user(set->sig[i], &rt_sf->uc.tuc_sigmask.sig[i]);
     }
 
-    frame = &rt_sf->uc.tuc_mcontext;
-    save_user_regs(env, frame);
-    encode_trampoline(TARGET_NR_rt_sigreturn, (uint32_t *)&frame->tramp);
+#if defined(TARGET_PPC64)
+    mctx = &rt_sf->uc.tuc_sigcontext.mcontext;
+    trampptr = &rt_sf->trampoline[0];
+#else
+    mctx = &rt_sf->uc.tuc_mcontext;
+    trampptr = (uint32_t *)&rt_sf->uc.tuc_mcontext.tramp;
+#endif
+
+    save_user_regs(env, mctx);
+    encode_trampoline(TARGET_NR_rt_sigreturn, trampptr);
 
     /* The kernel checks for the presence of a VDSO here.  We don't
        emulate a vdso, so use a sigreturn system call.  */
-    env->lr = (target_ulong) h2g(frame->tramp);
+    env->lr = (target_ulong) h2g(trampptr);
 
     /* Turn off all fp exceptions.  */
     env->fpscr = 0;
@@ -4795,7 +4820,7 @@ long do_sigreturn(CPUPPCState *env)
         goto sigsegv;
 
 #if defined(TARGET_PPC64)
-    set.sig[0] = sc->oldmask + ((long)(sc->_unused[3]) << 32);
+    set.sig[0] = sc->oldmask + ((uint64_t)(sc->_unused[3]) << 32);
 #else
     __get_user(set.sig[0], &sc->oldmask);
     __get_user(set.sig[1], &sc->_unused[3]);
@@ -4823,6 +4848,10 @@ sigsegv:
 /* See arch/powerpc/kernel/signal_32.c.  */
 static int do_setcontext(struct target_ucontext *ucp, CPUPPCState *env, int sig)
 {
+#if defined(TARGET_PPC64)
+    fprintf(stderr, "do_setcontext: not implemented\n");
+    return 0;
+#else
     struct target_mcontext *mcp;
     target_ulong mcp_addr;
     sigset_t blocked;
@@ -4832,10 +4861,6 @@ static int do_setcontext(struct target_ucontext *ucp, CPUPPCState *env, int sig)
                        sizeof (set)))
         return 1;
 
-#if defined(TARGET_PPC64)
-    fprintf (stderr, "do_setcontext: not implemented\n");
-    return 0;
-#else
     __get_user(mcp_addr, &ucp->tuc_regs);
 
     if (!lock_user_struct(VERIFY_READ, mcp, mcp_addr, 1))
commit 7678108b134fc14d0a94fd13c0dd1129525c12d7
Author: Tom Musta <tommusta at gmail.com>
Date:   Mon Jun 30 08:13:37 2014 -0500

    linux-user: Split PPC Trampoline Encoding from Register Save
    
    Split the encoding of the PowerPC sigreturn trampoline from the saving of
    register state onto the signal handler stack.  This will make it easier
    in subsequent patches to deal with variations in the stack frame layouts between
    32 and 64 bit PowerPC.
    
    Signed-off-by: Tom Musta <tommusta at gmail.com>
    Signed-off-by: Alexander Graf <agraf at suse.de>

diff --git a/linux-user/signal.c b/linux-user/signal.c
index 2952956..7365d5d 100644
--- a/linux-user/signal.c
+++ b/linux-user/signal.c
@@ -4491,8 +4491,7 @@ static target_ulong get_sigframe(struct target_sigaction *ka,
     return newsp;
 }
 
-static void save_user_regs(CPUPPCState *env, struct target_mcontext *frame,
-                          int sigret)
+static void save_user_regs(CPUPPCState *env, struct target_mcontext *frame)
 {
     target_ulong msr = env->msr;
     int i;
@@ -4559,11 +4558,14 @@ static void save_user_regs(CPUPPCState *env, struct target_mcontext *frame,
 
     /* Store MSR.  */
     __put_user(msr, &frame->mc_gregs[TARGET_PT_MSR]);
+}
 
+static void encode_trampoline(int sigret, uint32_t *tramp)
+{
     /* Set up the sigreturn trampoline: li r0,sigret; sc.  */
     if (sigret) {
-        __put_user(0x38000000UL | sigret, &frame->tramp[0]);
-        __put_user(0x44000002UL, &frame->tramp[1]);
+        __put_user(0x38000000 | sigret, &tramp[0]);
+        __put_user(0x44000002, &tramp[1]);
     }
 }
 
@@ -4674,7 +4676,10 @@ static void setup_frame(int sig, struct target_sigaction *ka,
     __put_user(sig, &sc->signal);
 
     /* Save user regs.  */
-    save_user_regs(env, &frame->mctx, TARGET_NR_sigreturn);
+    save_user_regs(env, &frame->mctx);
+
+    /* Construct the trampoline code on the stack. */
+    encode_trampoline(TARGET_NR_sigreturn, (uint32_t *)&frame->mctx.tramp);
 
     /* The kernel checks for the presence of a VDSO here.  We don't
        emulate a vdso, so use a sigreturn system call.  */
@@ -4740,7 +4745,8 @@ static void setup_rt_frame(int sig, struct target_sigaction *ka,
     }
 
     frame = &rt_sf->uc.tuc_mcontext;
-    save_user_regs(env, frame, TARGET_NR_rt_sigreturn);
+    save_user_regs(env, frame);
+    encode_trampoline(TARGET_NR_rt_sigreturn, (uint32_t *)&frame->tramp);
 
     /* The kernel checks for the presence of a VDSO here.  We don't
        emulate a vdso, so use a sigreturn system call.  */
commit fbdc200ac2d0d29e88ee6af9b77810c0f84265a6
Author: Tom Musta <tommusta at gmail.com>
Date:   Mon Jun 30 08:13:36 2014 -0500

    linux-user: Fix Stack Pointer Bug in PPC setup_rt_frame
    
    The code that sets the stack frame back pointer is incorrect for
    the setup_rt_frame() code; qemu will abort (SIGSEGV) in some
    environments.  The setup_frame code  was fixed in commit
    beb526b12134a6b6744125deec5a7fe24a8f92e3 but the setup_rt_frame
    code was not.
    
    Make the setup_rt_frame code consistent with the setup_frame
    code.
    
    Signed-off-by: Tom Musta <tommusta at gmail.com>
    Signed-off-by: Alexander Graf <agraf at suse.de>

diff --git a/linux-user/signal.c b/linux-user/signal.c
index 26929c5..2952956 100644
--- a/linux-user/signal.c
+++ b/linux-user/signal.c
@@ -4751,7 +4751,7 @@ static void setup_rt_frame(int sig, struct target_sigaction *ka,
 
     /* Create a stack frame for the caller of the handler.  */
     newsp = rt_sf_addr - (SIGNAL_FRAMESIZE + 16);
-    __put_user(env->gpr[1], (target_ulong *)(uintptr_t) newsp);
+    err |= put_user(env->gpr[1], newsp, target_ulong);
 
     if (err)
         goto sigsegv;
commit 2e14072f9e859272c7b94b8e189bd30bb4954aa1
Author: Nikunj A Dadhania <nikunj at linux.vnet.ibm.com>
Date:   Mon Jun 30 14:05:29 2014 +0530

    ppc: spapr-rtas - implement os-term rtas call
    
    PAPR compliant guest calls this in absence of kdump. This finally
    reaches the guest and can be handled according to the policies set by
    higher level tools(like taking dump) for further analysis by tools like
    crash.
    
    Linux kernel calls ibm,os-term when extended property of os-term is set.
    This makes sure that a return to the linux kernel is gauranteed.
    
    Signed-off-by: Nikunj A Dadhania <nikunj at linux.vnet.ibm.com>
    [agraf: reduce RTAS_TOKEN_MAX]
    Signed-off-by: Alexander Graf <agraf at suse.de>

diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 5cb452f..6bb646c 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -502,6 +502,15 @@ static void *spapr_create_fdt_skel(hwaddr initrd_base,
 
     _FDT((fdt_property_cell(fdt, "rtas-error-log-max", RTAS_ERROR_LOG_MAX)));
 
+    /*
+     * According to PAPR, rtas ibm,os-term, does not gaurantee a return
+     * back to the guest cpu.
+     *
+     * While an additional ibm,extended-os-term property indicates that
+     * rtas call return will always occur. Set this property.
+     */
+    _FDT((fdt_property(fdt, "ibm,extended-os-term", NULL, 0)));
+
     _FDT((fdt_end_node(fdt)));
 
     /* interrupt controller */
diff --git a/hw/ppc/spapr_rtas.c b/hw/ppc/spapr_rtas.c
index 9ba1ba6..2ec2a8e 100644
--- a/hw/ppc/spapr_rtas.c
+++ b/hw/ppc/spapr_rtas.c
@@ -277,6 +277,19 @@ static void rtas_ibm_set_system_parameter(PowerPCCPU *cpu,
     rtas_st(rets, 0, ret);
 }
 
+static void rtas_ibm_os_term(PowerPCCPU *cpu,
+                            sPAPREnvironment *spapr,
+                            uint32_t token, uint32_t nargs,
+                            target_ulong args,
+                            uint32_t nret, target_ulong rets)
+{
+    target_ulong ret = 0;
+
+    qapi_event_send_guest_panicked(GUEST_PANIC_ACTION_PAUSE, &error_abort);
+
+    rtas_st(rets, 0, ret);
+}
+
 static struct rtas_call {
     const char *name;
     spapr_rtas_fn fn;
@@ -404,6 +417,8 @@ static void core_rtas_register_types(void)
     spapr_rtas_register(RTAS_IBM_SET_SYSTEM_PARAMETER,
                         "ibm,set-system-parameter",
                         rtas_ibm_set_system_parameter);
+    spapr_rtas_register(RTAS_IBM_OS_TERM, "ibm,os-term",
+                        rtas_ibm_os_term);
 }
 
 type_init(core_rtas_register_types)
diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
index bbba51a..7fff979 100644
--- a/include/hw/ppc/spapr.h
+++ b/include/hw/ppc/spapr.h
@@ -382,9 +382,8 @@ int spapr_allocate_irq_block(int num, bool lsi, bool msi);
 #define RTAS_GET_SENSOR_STATE                   (RTAS_TOKEN_BASE + 0x1D)
 #define RTAS_IBM_CONFIGURE_CONNECTOR            (RTAS_TOKEN_BASE + 0x1E)
 #define RTAS_IBM_OS_TERM                        (RTAS_TOKEN_BASE + 0x1F)
-#define RTAS_IBM_EXTENDED_OS_TERM               (RTAS_TOKEN_BASE + 0x20)
 
-#define RTAS_TOKEN_MAX                          (RTAS_TOKEN_BASE + 0x21)
+#define RTAS_TOKEN_MAX                          (RTAS_TOKEN_BASE + 0x20)
 
 /* RTAS ibm,get-system-parameter token values */
 #define RTAS_SYSPARM_SPLPAR_CHARACTERISTICS      20
commit 277c7a4d717aedbcb253ca152ae4da67e4162470
Author: Alexander Graf <agraf at suse.de>
Date:   Thu Jul 24 10:46:47 2014 +0200

    PPC: KVM: Fix g3beige and mac99 when HV is loaded
    
    On PPC we have 2 different styles of KVM: PR and HV. HV can only virtualize
    sPAPR guests while PR can virtualize everything that's reasonably close to
    the host hardware platform.
    
    As long as only one kernel module (PR or HV) is loaded, the "default" kvm type
    is the module that's loaded. So if your hardware only supports PR mode you can
    easily spawn a Mac VM.
    
    However, if both HV and PR are loaded we default to HV mode. And in that case
    the Mac machines have to explicitly ask for PR mode to get a working VM.
    
    Fix this up by explicitly having the Mac machines ask for PR style KVM. This
    fixes bootup of Mac VMs on systems where bot HV and PR kvm modules are loaded
    for me.
    
    Signed-off-by: Alexander Graf <agraf at suse.de>

diff --git a/hw/ppc/mac_newworld.c b/hw/ppc/mac_newworld.c
index 1ec4bb4..0693168 100644
--- a/hw/ppc/mac_newworld.c
+++ b/hw/ppc/mac_newworld.c
@@ -477,12 +477,19 @@ static void ppc_core99_init(MachineState *machine)
     qemu_register_boot_set(fw_cfg_boot_set, fw_cfg);
 }
 
+static int core99_kvm_type(const char *arg)
+{
+    /* Always force PR KVM */
+    return 2;
+}
+
 static QEMUMachine core99_machine = {
     .name = "mac99",
     .desc = "Mac99 based PowerMAC",
     .init = ppc_core99_init,
     .max_cpus = MAX_CPUS,
     .default_boot_order = "cd",
+    .kvm_type = core99_kvm_type,
 };
 
 static void core99_machine_init(void)
diff --git a/hw/ppc/mac_oldworld.c b/hw/ppc/mac_oldworld.c
index cd9bdbc..ec7ed38 100644
--- a/hw/ppc/mac_oldworld.c
+++ b/hw/ppc/mac_oldworld.c
@@ -346,6 +346,12 @@ static void ppc_heathrow_init(MachineState *machine)
     qemu_register_boot_set(fw_cfg_boot_set, fw_cfg);
 }
 
+static int heathrow_kvm_type(const char *arg)
+{
+    /* Always force PR KVM */
+    return 2;
+}
+
 static QEMUMachine heathrow_machine = {
     .name = "g3beige",
     .desc = "Heathrow based PowerMAC",
@@ -355,6 +361,7 @@ static QEMUMachine heathrow_machine = {
     .is_default = 1,
 #endif
     .default_boot_order = "cd", /* TOFIX "cad" when Mac floppy is implemented */
+    .kvm_type = heathrow_kvm_type,
 };
 
 static void heathrow_machine_init(void)
commit 01ce352e62c3f86df6f4ad32c3ab9353e55af799
Author: John Snow <jsnow at redhat.com>
Date:   Thu Sep 4 23:42:17 2014 -0400

    ide: Add resize callback to ide/core
    
    Currently, if the block device backing the IDE drive is resized,
    the information about the device as cached inside of the IDEState
    structure is not updated, thus when a guest OS re-queries the drive,
    it is unable to see the expanded size.
    
    This patch adds a resize callback that updates the IDENTIFY data
    buffer in order to correct this.
    
    Lastly, a Linux guest as-is cannot resize a libata drive while in-use,
    but it can see the expanded size as part of a bus rescan event.
    This patch also allows guests such as Linux to see the new drive size
    after a soft reboot event, without having to exit the QEMU process.
    
    Signed-off-by: John Snow <jsnow at redhat.com>
    Signed-off-by: Stefan Hajnoczi <stefanha at redhat.com>

diff --git a/hw/ide/core.c b/hw/ide/core.c
index e0232d4..191f893 100644
--- a/hw/ide/core.c
+++ b/hw/ide/core.c
@@ -75,6 +75,17 @@ static void put_le16(uint16_t *p, unsigned int v)
     *p = cpu_to_le16(v);
 }
 
+static void ide_identify_size(IDEState *s)
+{
+    uint16_t *p = (uint16_t *)s->identify_data;
+    put_le16(p + 60, s->nb_sectors);
+    put_le16(p + 61, s->nb_sectors >> 16);
+    put_le16(p + 100, s->nb_sectors);
+    put_le16(p + 101, s->nb_sectors >> 16);
+    put_le16(p + 102, s->nb_sectors >> 32);
+    put_le16(p + 103, s->nb_sectors >> 48);
+}
+
 static void ide_identify(IDEState *s)
 {
     uint16_t *p;
@@ -115,8 +126,8 @@ static void ide_identify(IDEState *s)
     put_le16(p + 58, oldsize >> 16);
     if (s->mult_sectors)
         put_le16(p + 59, 0x100 | s->mult_sectors);
-    put_le16(p + 60, s->nb_sectors);
-    put_le16(p + 61, s->nb_sectors >> 16);
+    /* *(p + 60) := nb_sectors       -- see ide_identify_size */
+    /* *(p + 61) := nb_sectors >> 16 -- see ide_identify_size */
     put_le16(p + 62, 0x07); /* single word dma0-2 supported */
     put_le16(p + 63, 0x07); /* mdma0-2 supported */
     put_le16(p + 64, 0x03); /* pio3-4 supported */
@@ -161,10 +172,10 @@ static void ide_identify(IDEState *s)
     }
     put_le16(p + 88, 0x3f | (1 << 13)); /* udma5 set and supported */
     put_le16(p + 93, 1 | (1 << 14) | 0x2000);
-    put_le16(p + 100, s->nb_sectors);
-    put_le16(p + 101, s->nb_sectors >> 16);
-    put_le16(p + 102, s->nb_sectors >> 32);
-    put_le16(p + 103, s->nb_sectors >> 48);
+    /* *(p + 100) := nb_sectors       -- see ide_identify_size */
+    /* *(p + 101) := nb_sectors >> 16 -- see ide_identify_size */
+    /* *(p + 102) := nb_sectors >> 32 -- see ide_identify_size */
+    /* *(p + 103) := nb_sectors >> 48 -- see ide_identify_size */
 
     if (dev && dev->conf.physical_block_size)
         put_le16(p + 106, 0x6000 | get_physical_block_exp(&dev->conf));
@@ -179,6 +190,7 @@ static void ide_identify(IDEState *s)
         put_le16(p + 169, 1); /* TRIM support */
     }
 
+    ide_identify_size(s);
     s->identify_set = 1;
 
 fill_buffer:
@@ -253,6 +265,15 @@ fill_buffer:
     memcpy(s->io_buffer, p, sizeof(s->identify_data));
 }
 
+static void ide_cfata_identify_size(IDEState *s)
+{
+    uint16_t *p = (uint16_t *)s->identify_data;
+    put_le16(p + 7, s->nb_sectors >> 16);  /* Sectors per card */
+    put_le16(p + 8, s->nb_sectors);        /* Sectors per card */
+    put_le16(p + 60, s->nb_sectors);       /* Total LBA sectors */
+    put_le16(p + 61, s->nb_sectors >> 16); /* Total LBA sectors */
+}
+
 static void ide_cfata_identify(IDEState *s)
 {
     uint16_t *p;
@@ -270,8 +291,8 @@ static void ide_cfata_identify(IDEState *s)
     put_le16(p + 1, s->cylinders);		/* Default cylinders */
     put_le16(p + 3, s->heads);			/* Default heads */
     put_le16(p + 6, s->sectors);		/* Default sectors per track */
-    put_le16(p + 7, s->nb_sectors >> 16);	/* Sectors per card */
-    put_le16(p + 8, s->nb_sectors);		/* Sectors per card */
+    /* *(p + 7) := nb_sectors >> 16 -- see ide_cfata_identify_size */
+    /* *(p + 8) := nb_sectors       -- see ide_cfata_identify_size */
     padstr((char *)(p + 10), s->drive_serial_str, 20); /* serial number */
     put_le16(p + 22, 0x0004);			/* ECC bytes */
     padstr((char *) (p + 23), s->version, 8);	/* Firmware Revision */
@@ -292,8 +313,8 @@ static void ide_cfata_identify(IDEState *s)
     put_le16(p + 58, cur_sec >> 16);		/* Current capacity */
     if (s->mult_sectors)			/* Multiple sector setting */
         put_le16(p + 59, 0x100 | s->mult_sectors);
-    put_le16(p + 60, s->nb_sectors);		/* Total LBA sectors */
-    put_le16(p + 61, s->nb_sectors >> 16);	/* Total LBA sectors */
+    /* *(p + 60) := nb_sectors       -- see ide_cfata_identify_size */
+    /* *(p + 61) := nb_sectors >> 16 -- see ide_cfata_identify_size */
     put_le16(p + 63, 0x0203);			/* Multiword DMA capability */
     put_le16(p + 64, 0x0001);			/* Flow Control PIO support */
     put_le16(p + 65, 0x0096);			/* Min. Multiword DMA cycle */
@@ -313,6 +334,7 @@ static void ide_cfata_identify(IDEState *s)
     put_le16(p + 160, 0x8100);			/* Power requirement */
     put_le16(p + 161, 0x8001);			/* CF command set */
 
+    ide_cfata_identify_size(s);
     s->identify_set = 1;
 
 fill_buffer:
@@ -2131,6 +2153,28 @@ static bool ide_cd_is_medium_locked(void *opaque)
     return ((IDEState *)opaque)->tray_locked;
 }
 
+static void ide_resize_cb(void *opaque)
+{
+    IDEState *s = opaque;
+    uint64_t nb_sectors;
+
+    if (!s->identify_set) {
+        return;
+    }
+
+    bdrv_get_geometry(s->bs, &nb_sectors);
+    s->nb_sectors = nb_sectors;
+
+    /* Update the identify data buffer. */
+    if (s->drive_kind == IDE_CFATA) {
+        ide_cfata_identify_size(s);
+    } else {
+        /* IDE_CD uses a different set of callbacks entirely. */
+        assert(s->drive_kind != IDE_CD);
+        ide_identify_size(s);
+    }
+}
+
 static const BlockDevOps ide_cd_block_ops = {
     .change_media_cb = ide_cd_change_cb,
     .eject_request_cb = ide_cd_eject_request_cb,
@@ -2138,6 +2182,10 @@ static const BlockDevOps ide_cd_block_ops = {
     .is_medium_locked = ide_cd_is_medium_locked,
 };
 
+static const BlockDevOps ide_hd_block_ops = {
+    .resize_cb = ide_resize_cb,
+};
+
 int ide_init_drive(IDEState *s, BlockDriverState *bs, IDEDriveKind kind,
                    const char *version, const char *serial, const char *model,
                    uint64_t wwn,
@@ -2174,6 +2222,7 @@ int ide_init_drive(IDEState *s, BlockDriverState *bs, IDEDriveKind kind,
             error_report("Can't use a read-only drive");
             return -1;
         }
+        bdrv_set_dev_ops(bs, &ide_hd_block_ops, s);
     }
     if (serial) {
         pstrcpy(s->drive_serial_str, sizeof(s->drive_serial_str), serial);
commit 4bf6637d35723f92e03f427c78d7ad130be41e6f
Author: John Snow <jsnow at redhat.com>
Date:   Thu Sep 4 23:42:16 2014 -0400

    IDE: Fill the IDENTIFY request consistently
    
    IDE-HD, IDE-ATAPI and IDE-CFATA all fill the
    identify buffer in slightly different ways,
    this is a relatively minor patch to make them
    uniform, to emphasize that:
    
    (1) We build the s->identify_data cache first, then
    (2) We copy it to s->io_buffer to fulfill the request.
    
    Signed-off-by: John Snow <jsnow at redhat.com>
    Signed-off-by: Stefan Hajnoczi <stefanha at redhat.com>

diff --git a/hw/ide/core.c b/hw/ide/core.c
index de0e5e9..e0232d4 100644
--- a/hw/ide/core.c
+++ b/hw/ide/core.c
@@ -81,13 +81,12 @@ static void ide_identify(IDEState *s)
     unsigned int oldsize;
     IDEDevice *dev = s->unit ? s->bus->slave : s->bus->master;
 
+    p = (uint16_t *)s->identify_data;
     if (s->identify_set) {
-	memcpy(s->io_buffer, s->identify_data, sizeof(s->identify_data));
-	return;
+        goto fill_buffer;
     }
+    memset(p, 0, sizeof(s->identify_data));
 
-    memset(s->io_buffer, 0, 512);
-    p = (uint16_t *)s->io_buffer;
     put_le16(p + 0, 0x0040);
     put_le16(p + 1, s->cylinders);
     put_le16(p + 3, s->heads);
@@ -180,21 +179,22 @@ static void ide_identify(IDEState *s)
         put_le16(p + 169, 1); /* TRIM support */
     }
 
-    memcpy(s->identify_data, p, sizeof(s->identify_data));
     s->identify_set = 1;
+
+fill_buffer:
+    memcpy(s->io_buffer, p, sizeof(s->identify_data));
 }
 
 static void ide_atapi_identify(IDEState *s)
 {
     uint16_t *p;
 
+    p = (uint16_t *)s->identify_data;
     if (s->identify_set) {
-	memcpy(s->io_buffer, s->identify_data, sizeof(s->identify_data));
-	return;
+        goto fill_buffer;
     }
+    memset(p, 0, sizeof(s->identify_data));
 
-    memset(s->io_buffer, 0, 512);
-    p = (uint16_t *)s->io_buffer;
     /* Removable CDROM, 50us response, 12 byte packets */
     put_le16(p + 0, (2 << 14) | (5 << 8) | (1 << 7) | (2 << 5) | (0 << 0));
     padstr((char *)(p + 10), s->drive_serial_str, 20); /* serial number */
@@ -247,8 +247,10 @@ static void ide_atapi_identify(IDEState *s)
         put_le16(p + 111, s->wwn);
     }
 
-    memcpy(s->identify_data, p, sizeof(s->identify_data));
     s->identify_set = 1;
+
+fill_buffer:
+    memcpy(s->io_buffer, p, sizeof(s->identify_data));
 }
 
 static void ide_cfata_identify(IDEState *s)
@@ -256,10 +258,10 @@ static void ide_cfata_identify(IDEState *s)
     uint16_t *p;
     uint32_t cur_sec;
 
-    p = (uint16_t *) s->identify_data;
-    if (s->identify_set)
+    p = (uint16_t *)s->identify_data;
+    if (s->identify_set) {
         goto fill_buffer;
-
+    }
     memset(p, 0, sizeof(s->identify_data));
 
     cur_sec = s->cylinders * s->heads * s->sectors;
commit b6b1d31f098eef8cd13556d343e46c213fac972a
Author: Stefan Hajnoczi <stefanha at redhat.com>
Date:   Thu Sep 4 21:04:43 2014 +0100

    vmdk: fix buf leak in vmdk_parse_extents()
    
    vmdk_open_sparse() does not take ownership of buf so the caller always
    needs to free it.
    
    Signed-off-by: Stefan Hajnoczi <stefanha at redhat.com>
    Reviewed-by: Max Reitz <mreitz at redhat.com>
    Reviewed-by: Fam Zheng <famz at redhat.com>

diff --git a/block/vmdk.c b/block/vmdk.c
index 9bf28f3..a1cb911 100644
--- a/block/vmdk.c
+++ b/block/vmdk.c
@@ -846,8 +846,8 @@ static int vmdk_parse_extents(const char *desc, BlockDriverState *bs,
             } else {
                 ret = vmdk_open_sparse(bs, extent_file, bs->open_flags, buf, errp);
             }
+            g_free(buf);
             if (ret) {
-                g_free(buf);
                 bdrv_unref(extent_file);
                 return ret;
             }
commit ff74f33c310892c90c4439d963a6ce67f47ce18c
Author: Stefan Hajnoczi <stefanha at redhat.com>
Date:   Thu Sep 4 21:04:42 2014 +0100

    vmdk: fix vmdk_parse_extents() extent_file leaks
    
    Signed-off-by: Stefan Hajnoczi <stefanha at redhat.com>
    Reviewed-by: Max Reitz <mreitz at redhat.com>
    Reviewed-by: Fam Zheng <famz at redhat.com>

diff --git a/block/vmdk.c b/block/vmdk.c
index 07cb62c..9bf28f3 100644
--- a/block/vmdk.c
+++ b/block/vmdk.c
@@ -834,6 +834,7 @@ static int vmdk_parse_extents(const char *desc, BlockDriverState *bs,
             ret = vmdk_add_extent(bs, extent_file, true, sectors,
                             0, 0, 0, 0, 0, &extent, errp);
             if (ret < 0) {
+                bdrv_unref(extent_file);
                 return ret;
             }
             extent->flat_start_offset = flat_offset << 9;
@@ -853,6 +854,7 @@ static int vmdk_parse_extents(const char *desc, BlockDriverState *bs,
             extent = &s->extents[s->num_extents - 1];
         } else {
             error_setg(errp, "Unsupported extent type '%s'", type);
+            bdrv_unref(extent_file);
             return -ENOTSUP;
         }
         extent->type = g_strdup(type);
commit c5fe97e359bf03db9a005433092f25d27d57398f
Author: John Snow <jsnow at redhat.com>
Date:   Tue Aug 19 14:57:55 2014 -0400

    ide: Add wwn support to IDE-ATAPI drive
    
    Although it is possible to specify the wwn
    property for cdrom devices on the command line,
    the underlying driver fails to relay this information
    to the guest operating system via IDENTIFY.
    
    This is a simple patch to correct that.
    
    See ATA8-ACS, Table 22 parts 5, 6, and 9.
    
    Signed-off-by: John Snow <jsnow at redhat.com>
    Reviewed-by: Fam Zheng <famz at redhat.com>
    Signed-off-by: Stefan Hajnoczi <stefanha at redhat.com>

diff --git a/hw/ide/core.c b/hw/ide/core.c
index b48127f..de0e5e9 100644
--- a/hw/ide/core.c
+++ b/hw/ide/core.c
@@ -230,9 +230,23 @@ static void ide_atapi_identify(IDEState *s)
     }
 
     put_le16(p + 80, 0x1e); /* support up to ATA/ATAPI-4 */
+    if (s->wwn) {
+        put_le16(p + 84, (1 << 8)); /* supports WWN for words 108-111 */
+        put_le16(p + 87, (1 << 8)); /* WWN enabled */
+    }
+
 #ifdef USE_DMA_CDROM
     put_le16(p + 88, 0x3f | (1 << 13)); /* udma5 set and supported */
 #endif
+
+    if (s->wwn) {
+        /* LE 16-bit words 111-108 contain 64-bit World Wide Name */
+        put_le16(p + 108, s->wwn >> 48);
+        put_le16(p + 109, s->wwn >> 32);
+        put_le16(p + 110, s->wwn >> 16);
+        put_le16(p + 111, s->wwn);
+    }
+
     memcpy(s->identify_data, p, sizeof(s->identify_data));
     s->identify_set = 1;
 }
commit 0142f88bff3dd5cb819c9900da1c1e0a4aae9c44
Author: John Snow <jsnow at redhat.com>
Date:   Fri Aug 1 11:38:59 2014 -0400

    qtest/ide: Uninitialize PC allocator
    
    Use the new call to pc_alloc_uninit
    as a test for the new pathways.
    
    The leak checking / assert pathways are
    not enabled in this patch, leaving this
    as an option to future test writers.
    
    Signed-off-by: John Snow <jsnow at redhat.com>
    Signed-off-by: Stefan Hajnoczi <stefanha at redhat.com>

diff --git a/tests/ide-test.c b/tests/ide-test.c
index ffce6ed..b7a97e9 100644
--- a/tests/ide-test.c
+++ b/tests/ide-test.c
@@ -126,6 +126,8 @@ static void ide_test_start(const char *cmdline_fmt, ...)
 
 static void ide_test_quit(void)
 {
+    pc_alloc_uninit(guest_malloc);
+    guest_malloc = NULL;
     qtest_end();
 }
 
commit ec2f160538a24b72767e2afd1f0ba3085a35b79d
Author: John Snow <jsnow at redhat.com>
Date:   Fri Aug 1 11:38:58 2014 -0400

    libqos: add a simple first-fit memory allocator
    
    Implement a simple first-fit memory allocator that
    attempts to keep track of leased blocks of memory
    in order to be able to re-use blocks.
    
    Additionally, allow the user to specify when
    initializing the device that upon cleanup,
    we would like to assert that there are no
    blocks in use. This may be useful for identifying
    problems in qtests that use more complicated
    set-up and tear-down routines.
    
    This functionality is used in my upcoming ahci-test v2
    patch set, but I didn't see fit to enable it for any
    existing tests, which will continue to operate the
    same as they have prior.
    
    Signed-off-by: John Snow <jsnow at redhat.com>
    Signed-off-by: Stefan Hajnoczi <stefanha at redhat.com>

diff --git a/tests/libqos/malloc-pc.c b/tests/libqos/malloc-pc.c
index be1d97f..f4218c6 100644
--- a/tests/libqos/malloc-pc.c
+++ b/tests/libqos/malloc-pc.c
@@ -17,45 +17,294 @@
 #include "hw/nvram/fw_cfg.h"
 
 #include "qemu-common.h"
+#include "qemu/queue.h"
 #include <glib.h>
 
 #define PAGE_SIZE (4096)
 
+#define MLIST_ENTNAME entries
+typedef QTAILQ_HEAD(MemList, MemBlock) MemList;
+typedef struct MemBlock {
+    QTAILQ_ENTRY(MemBlock) MLIST_ENTNAME;
+    uint64_t size;
+    uint64_t addr;
+} MemBlock;
+
 typedef struct PCAlloc
 {
     QGuestAllocator alloc;
-
+    PCAllocOpts opts;
     uint64_t start;
     uint64_t end;
+
+    MemList used;
+    MemList free;
 } PCAlloc;
 
-static uint64_t pc_alloc(QGuestAllocator *allocator, size_t size)
+static MemBlock *mlist_new(uint64_t addr, uint64_t size)
 {
-    PCAlloc *s = container_of(allocator, PCAlloc, alloc);
-    uint64_t addr;
+    MemBlock *block;
+
+    if (!size) {
+        return NULL;
+    }
+    block = g_malloc0(sizeof(MemBlock));
 
+    block->addr = addr;
+    block->size = size;
 
-    size += (PAGE_SIZE - 1);
-    size &= -PAGE_SIZE;
+    return block;
+}
+
+static void mlist_delete(MemList *list, MemBlock *node)
+{
+    g_assert(list && node);
+    QTAILQ_REMOVE(list, node, MLIST_ENTNAME);
+    g_free(node);
+}
+
+static MemBlock *mlist_find_key(MemList *head, uint64_t addr)
+{
+    MemBlock *node;
+    QTAILQ_FOREACH(node, head, MLIST_ENTNAME) {
+        if (node->addr == addr) {
+            return node;
+        }
+    }
+    return NULL;
+}
+
+static MemBlock *mlist_find_space(MemList *head, uint64_t size)
+{
+    MemBlock *node;
+
+    QTAILQ_FOREACH(node, head, MLIST_ENTNAME) {
+        if (node->size >= size) {
+            return node;
+        }
+    }
+    return NULL;
+}
+
+static MemBlock *mlist_sort_insert(MemList *head, MemBlock *insr)
+{
+    MemBlock *node;
+    g_assert(head && insr);
+
+    QTAILQ_FOREACH(node, head, MLIST_ENTNAME) {
+        if (insr->addr < node->addr) {
+            QTAILQ_INSERT_BEFORE(node, insr, MLIST_ENTNAME);
+            return insr;
+        }
+    }
+
+    QTAILQ_INSERT_TAIL(head, insr, MLIST_ENTNAME);
+    return insr;
+}
+
+static inline uint64_t mlist_boundary(MemBlock *node)
+{
+    return node->size + node->addr;
+}
+
+static MemBlock *mlist_join(MemList *head, MemBlock *left, MemBlock *right)
+{
+    g_assert(head && left && right);
+
+    left->size += right->size;
+    mlist_delete(head, right);
+    return left;
+}
+
+static void mlist_coalesce(MemList *head, MemBlock *node)
+{
+    g_assert(node);
+    MemBlock *left;
+    MemBlock *right;
+    char merge;
+
+    do {
+        merge = 0;
+        left = QTAILQ_PREV(node, MemList, MLIST_ENTNAME);
+        right = QTAILQ_NEXT(node, MLIST_ENTNAME);
+
+        /* clowns to the left of me */
+        if (left && mlist_boundary(left) == node->addr) {
+            node = mlist_join(head, left, node);
+            merge = 1;
+        }
+
+        /* jokers to the right */
+        if (right && mlist_boundary(node) == right->addr) {
+            node = mlist_join(head, node, right);
+            merge = 1;
+        }
+
+    } while (merge);
+}
+
+static uint64_t pc_mlist_fulfill(PCAlloc *s, MemBlock *freenode, uint64_t size)
+{
+    uint64_t addr;
+    MemBlock *usednode;
 
-    g_assert_cmpint((s->start + size), <=, s->end);
+    g_assert(freenode);
+    g_assert_cmpint(freenode->size, >=, size);
 
-    addr = s->start;
-    s->start += size;
+    addr = freenode->addr;
+    if (freenode->size == size) {
+        /* re-use this freenode as our used node */
+        QTAILQ_REMOVE(&s->free, freenode, MLIST_ENTNAME);
+        usednode = freenode;
+    } else {
+        /* adjust the free node and create a new used node */
+        freenode->addr += size;
+        freenode->size -= size;
+        usednode = mlist_new(addr, size);
+    }
 
+    mlist_sort_insert(&s->used, usednode);
     return addr;
 }
 
+/* To assert the correctness of the list.
+ * Used only if PC_ALLOC_PARANOID is set. */
+static void pc_mlist_check(PCAlloc *s)
+{
+    MemBlock *node;
+    uint64_t addr = s->start > 0 ? s->start - 1 : 0;
+    uint64_t next = s->start;
+
+    QTAILQ_FOREACH(node, &s->free, MLIST_ENTNAME) {
+        g_assert_cmpint(node->addr, >, addr);
+        g_assert_cmpint(node->addr, >=, next);
+        addr = node->addr;
+        next = node->addr + node->size;
+    }
+
+    addr = s->start > 0 ? s->start - 1 : 0;
+    next = s->start;
+    QTAILQ_FOREACH(node, &s->used, MLIST_ENTNAME) {
+        g_assert_cmpint(node->addr, >, addr);
+        g_assert_cmpint(node->addr, >=, next);
+        addr = node->addr;
+        next = node->addr + node->size;
+    }
+}
+
+static uint64_t pc_mlist_alloc(PCAlloc *s, uint64_t size)
+{
+    MemBlock *node;
+
+    node = mlist_find_space(&s->free, size);
+    if (!node) {
+        fprintf(stderr, "Out of guest memory.\n");
+        g_assert_not_reached();
+    }
+    return pc_mlist_fulfill(s, node, size);
+}
+
+static void pc_mlist_free(PCAlloc *s, uint64_t addr)
+{
+    MemBlock *node;
+
+    if (addr == 0) {
+        return;
+    }
+
+    node = mlist_find_key(&s->used, addr);
+    if (!node) {
+        fprintf(stderr, "Error: no record found for an allocation at "
+                "0x%016" PRIx64 ".\n",
+                addr);
+        g_assert_not_reached();
+    }
+
+    /* Rip it out of the used list and re-insert back into the free list. */
+    QTAILQ_REMOVE(&s->used, node, MLIST_ENTNAME);
+    mlist_sort_insert(&s->free, node);
+    mlist_coalesce(&s->free, node);
+}
+
+static uint64_t pc_alloc(QGuestAllocator *allocator, size_t size)
+{
+    PCAlloc *s = container_of(allocator, PCAlloc, alloc);
+    uint64_t rsize = size;
+    uint64_t naddr;
+
+    rsize += (PAGE_SIZE - 1);
+    rsize &= -PAGE_SIZE;
+    g_assert_cmpint((s->start + rsize), <=, s->end);
+    g_assert_cmpint(rsize, >=, size);
+
+    naddr = pc_mlist_alloc(s, rsize);
+    if (s->opts & PC_ALLOC_PARANOID) {
+        pc_mlist_check(s);
+    }
+
+    return naddr;
+}
+
 static void pc_free(QGuestAllocator *allocator, uint64_t addr)
 {
+    PCAlloc *s = container_of(allocator, PCAlloc, alloc);
+
+    pc_mlist_free(s, addr);
+    if (s->opts & PC_ALLOC_PARANOID) {
+        pc_mlist_check(s);
+    }
+}
+
+/*
+ * Mostly for valgrind happiness, but it does offer
+ * a chokepoint for debugging guest memory leaks, too.
+ */
+void pc_alloc_uninit(QGuestAllocator *allocator)
+{
+    PCAlloc *s = container_of(allocator, PCAlloc, alloc);
+    MemBlock *node;
+    MemBlock *tmp;
+    PCAllocOpts mask;
+
+    /* Check for guest leaks, and destroy the list. */
+    QTAILQ_FOREACH_SAFE(node, &s->used, MLIST_ENTNAME, tmp) {
+        if (s->opts & (PC_ALLOC_LEAK_WARN | PC_ALLOC_LEAK_ASSERT)) {
+            fprintf(stderr, "guest malloc leak @ 0x%016" PRIx64 "; "
+                    "size 0x%016" PRIx64 ".\n",
+                    node->addr, node->size);
+        }
+        if (s->opts & (PC_ALLOC_LEAK_ASSERT)) {
+            g_assert_not_reached();
+        }
+        g_free(node);
+    }
+
+    /* If we have previously asserted that there are no leaks, then there
+     * should be only one node here with a specific address and size. */
+    mask = PC_ALLOC_LEAK_ASSERT | PC_ALLOC_PARANOID;
+    QTAILQ_FOREACH_SAFE(node, &s->free, MLIST_ENTNAME, tmp) {
+        if ((s->opts & mask) == mask) {
+            if ((node->addr != s->start) ||
+                (node->size != s->end - s->start)) {
+                fprintf(stderr, "Free list is corrupted.\n");
+                g_assert_not_reached();
+            }
+        }
+
+        g_free(node);
+    }
+
+    g_free(s);
 }
 
-QGuestAllocator *pc_alloc_init(void)
+QGuestAllocator *pc_alloc_init_flags(PCAllocOpts flags)
 {
     PCAlloc *s = g_malloc0(sizeof(*s));
     uint64_t ram_size;
     QFWCFG *fw_cfg = pc_fw_cfg_init();
+    MemBlock *node;
 
+    s->opts = flags;
     s->alloc.alloc = pc_alloc;
     s->alloc.free = pc_free;
 
@@ -70,5 +319,16 @@ QGuestAllocator *pc_alloc_init(void)
     /* clean-up */
     g_free(fw_cfg);
 
+    QTAILQ_INIT(&s->used);
+    QTAILQ_INIT(&s->free);
+
+    node = mlist_new(s->start, s->end - s->start);
+    QTAILQ_INSERT_HEAD(&s->free, node, MLIST_ENTNAME);
+
     return &s->alloc;
 }
+
+inline QGuestAllocator *pc_alloc_init(void)
+{
+    return pc_alloc_init_flags(PC_ALLOC_NO_FLAGS);
+}
diff --git a/tests/libqos/malloc-pc.h b/tests/libqos/malloc-pc.h
index ff964ab..9f525e3 100644
--- a/tests/libqos/malloc-pc.h
+++ b/tests/libqos/malloc-pc.h
@@ -15,6 +15,15 @@
 
 #include "libqos/malloc.h"
 
+typedef enum {
+    PC_ALLOC_NO_FLAGS    = 0x00,
+    PC_ALLOC_LEAK_WARN   = 0x01,
+    PC_ALLOC_LEAK_ASSERT = 0x02,
+    PC_ALLOC_PARANOID    = 0x04
+} PCAllocOpts;
+
 QGuestAllocator *pc_alloc_init(void);
+QGuestAllocator *pc_alloc_init_flags(PCAllocOpts flags);
+void             pc_alloc_uninit(QGuestAllocator *allocator);
 
 #endif
commit 53b33231f76e85984eb18a4c63fd132d0d4e3f40
Author: MORITA Kazutaka <morita.kazutaka at gmail.com>
Date:   Wed Sep 3 22:13:17 2014 +0900

    MAINTAINERS: update sheepdog maintainer
    
    Hitoshi takes over sheepdog maintenance from me.
    
    Signed-off-by: MORITA Kazutaka <morita.kazutaka at gmail.com>
    Signed-off-by: Stefan Hajnoczi <stefanha at redhat.com>

diff --git a/MAINTAINERS b/MAINTAINERS
index 8622e01..206bf7e 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -980,7 +980,7 @@ S: Supported
 F: block/rbd.c
 
 Sheepdog
-M: MORITA Kazutaka <morita.kazutaka at lab.ntt.co.jp>
+M: Hitoshi Mitake <mitake.hitoshi at lab.ntt.co.jp>
 M: Liu Yuan <namei.unix at gmail.com>
 L: sheepdog at lists.wpkg.org
 S: Supported
commit 713cc671f107eb8f6ced6ff1e7cf088029619731
Author: Peter Lieven <pl at kamp.de>
Date:   Wed Aug 13 19:20:19 2014 +0200

    qemu-nbd: fix indentation and coding style
    
    Signed-off-by: Peter Lieven <pl at kamp.de>
    Reviewed-by: Eric Blake <eblake at redhat.com>
    Reviewed-by: Benoit Canet <benoit.canet at nodalink.com>
    Signed-off-by: Stefan Hajnoczi <stefanha at redhat.com>

diff --git a/qemu-nbd.c b/qemu-nbd.c
index 6ef8b10..9bc152e 100644
--- a/qemu-nbd.c
+++ b/qemu-nbd.c
@@ -39,10 +39,10 @@
 #include <libgen.h>
 #include <pthread.h>
 
-#define SOCKET_PATH          "/var/lock/qemu-nbd-%s"
-#define QEMU_NBD_OPT_CACHE   1
-#define QEMU_NBD_OPT_AIO     2
-#define QEMU_NBD_OPT_DISCARD 3
+#define SOCKET_PATH                "/var/lock/qemu-nbd-%s"
+#define QEMU_NBD_OPT_CACHE         1
+#define QEMU_NBD_OPT_AIO           2
+#define QEMU_NBD_OPT_DISCARD       3
 #define QEMU_NBD_OPT_DETECT_ZEROES 4
 
 static NBDExport *exp;
@@ -60,44 +60,44 @@ static void usage(const char *name)
 "Usage: %s [OPTIONS] FILE\n"
 "QEMU Disk Network Block Device Server\n"
 "\n"
-"  -h, --help           display this help and exit\n"
-"  -V, --version        output version information and exit\n"
+"  -h, --help                display this help and exit\n"
+"  -V, --version             output version information and exit\n"
 "\n"
 "Connection properties:\n"
-"  -p, --port=PORT      port to listen on (default `%d')\n"
-"  -b, --bind=IFACE     interface to bind to (default `0.0.0.0')\n"
-"  -k, --socket=PATH    path to the unix socket\n"
-"                       (default '"SOCKET_PATH"')\n"
-"  -e, --shared=NUM     device can be shared by NUM clients (default '1')\n"
-"  -t, --persistent     don't exit on the last connection\n"
-"  -v, --verbose        display extra debugging information\n"
+"  -p, --port=PORT           port to listen on (default `%d')\n"
+"  -b, --bind=IFACE          interface to bind to (default `0.0.0.0')\n"
+"  -k, --socket=PATH         path to the unix socket\n"
+"                            (default '"SOCKET_PATH"')\n"
+"  -e, --shared=NUM          device can be shared by NUM clients (default '1')\n"
+"  -t, --persistent          don't exit on the last connection\n"
+"  -v, --verbose             display extra debugging information\n"
 "\n"
 "Exposing part of the image:\n"
-"  -o, --offset=OFFSET  offset into the image\n"
-"  -P, --partition=NUM  only expose partition NUM\n"
+"  -o, --offset=OFFSET       offset into the image\n"
+"  -P, --partition=NUM       only expose partition NUM\n"
 "\n"
 #ifdef __linux__
 "Kernel NBD client support:\n"
-"  -c, --connect=DEV    connect FILE to the local NBD device DEV\n"
-"  -d, --disconnect     disconnect the specified device\n"
+"  -c, --connect=DEV         connect FILE to the local NBD device DEV\n"
+"  -d, --disconnect          disconnect the specified device\n"
 "\n"
 #endif
 "\n"
 "Block device options:\n"
-"  -f, --format=FORMAT  set image format (raw, qcow2, ...)\n"
-"  -r, --read-only      export read-only\n"
-"  -s, --snapshot       use FILE as an external snapshot, create a temporary\n"
-"                       file with backing_file=FILE, redirect the write to\n"
-"                       the temporary one\n"
+"  -f, --format=FORMAT       set image format (raw, qcow2, ...)\n"
+"  -r, --read-only           export read-only\n"
+"  -s, --snapshot            use FILE as an external snapshot, create a temporary\n"
+"                            file with backing_file=FILE, redirect the write to\n"
+"                            the temporary one\n"
 "  -l, --load-snapshot=SNAPSHOT_PARAM\n"
-"                       load an internal snapshot inside FILE and export it\n"
-"                       as an read-only device, SNAPSHOT_PARAM format is\n"
-"                       'snapshot.id=[ID],snapshot.name=[NAME]', or\n"
-"                       '[ID_OR_NAME]'\n"
-"  -n, --nocache        disable host cache\n"
-"      --cache=MODE     set cache mode (none, writeback, ...)\n"
+"                            load an internal snapshot inside FILE and export it\n"
+"                            as an read-only device, SNAPSHOT_PARAM format is\n"
+"                            'snapshot.id=[ID],snapshot.name=[NAME]', or\n"
+"                            '[ID_OR_NAME]'\n"
+"  -n, --nocache             disable host cache\n"
+"      --cache=MODE          set cache mode (none, writeback, ...)\n"
 #ifdef CONFIG_LINUX_AIO
-"      --aio=MODE       set AIO mode (native or threads)\n"
+"      --aio=MODE            set AIO mode (native or threads)\n"
 #endif
 "      --discard=MODE        set discard mode (ignore, unmap)\n"
 "      --detect-zeroes=MODE  set detect-zeroes mode (off, on, discard)\n"
@@ -546,15 +546,18 @@ int main(int argc, char **argv)
             break;
         case 'P':
             partition = strtol(optarg, &end, 0);
-            if (*end)
+            if (*end) {
                 errx(EXIT_FAILURE, "Invalid partition `%s'", optarg);
-            if (partition < 1 || partition > 8)
+            }
+            if (partition < 1 || partition > 8) {
                 errx(EXIT_FAILURE, "Invalid partition %d", partition);
+            }
             break;
         case 'k':
             sockpath = optarg;
-            if (sockpath[0] != '/')
+            if (sockpath[0] != '/') {
                 errx(EXIT_FAILURE, "socket path must be absolute\n");
+            }
             break;
         case 'd':
             disconnect = true;
@@ -574,9 +577,9 @@ int main(int argc, char **argv)
         case 'f':
             fmt = optarg;
             break;
-	case 't':
-	    persistent = 1;
-	    break;
+        case 't':
+            persistent = 1;
+            break;
         case 'v':
             verbose = 1;
             break;
@@ -611,7 +614,7 @@ int main(int argc, char **argv)
 
         printf("%s disconnected\n", argv[optind]);
 
-	return 0;
+        return 0;
     }
 
     if (device && !verbose) {
commit b3838a4088865d1767e137d2b7974b6885822eb2
Author: Peter Lieven <pl at kamp.de>
Date:   Wed Aug 13 19:20:18 2014 +0200

    qemu-nbd: add option to set detect-zeroes mode
    
    Signed-off-by: Peter Lieven <pl at kamp.de>
    Reviewed-by: Eric Blake <eblake at redhat.com>
    Reviewed-by: Benoit Canet <benoit.canet at nodalink.com>
    Signed-off-by: Stefan Hajnoczi <stefanha at redhat.com>

diff --git a/qemu-nbd.c b/qemu-nbd.c
index 626e584..6ef8b10 100644
--- a/qemu-nbd.c
+++ b/qemu-nbd.c
@@ -18,11 +18,13 @@
 
 #include "qemu-common.h"
 #include "block/block.h"
+#include "block/block_int.h"
 #include "block/nbd.h"
 #include "qemu/main-loop.h"
 #include "qemu/sockets.h"
 #include "qemu/error-report.h"
 #include "block/snapshot.h"
+#include "qapi/util.h"
 
 #include <stdarg.h>
 #include <stdio.h>
@@ -41,6 +43,7 @@
 #define QEMU_NBD_OPT_CACHE   1
 #define QEMU_NBD_OPT_AIO     2
 #define QEMU_NBD_OPT_DISCARD 3
+#define QEMU_NBD_OPT_DETECT_ZEROES 4
 
 static NBDExport *exp;
 static int verbose;
@@ -96,6 +99,8 @@ static void usage(const char *name)
 #ifdef CONFIG_LINUX_AIO
 "      --aio=MODE       set AIO mode (native or threads)\n"
 #endif
+"      --discard=MODE        set discard mode (ignore, unmap)\n"
+"      --detect-zeroes=MODE  set detect-zeroes mode (off, on, discard)\n"
 "\n"
 "Report bugs to <qemu-devel at nongnu.org>\n"
     , name, NBD_DEFAULT_PORT, "DEVICE");
@@ -410,6 +415,7 @@ int main(int argc, char **argv)
         { "aio", 1, NULL, QEMU_NBD_OPT_AIO },
 #endif
         { "discard", 1, NULL, QEMU_NBD_OPT_DISCARD },
+        { "detect-zeroes", 1, NULL, QEMU_NBD_OPT_DETECT_ZEROES },
         { "shared", 1, NULL, 'e' },
         { "format", 1, NULL, 'f' },
         { "persistent", 0, NULL, 't' },
@@ -432,6 +438,7 @@ int main(int argc, char **argv)
     pthread_t client_thread;
     const char *fmt = NULL;
     Error *local_err = NULL;
+    BlockdevDetectZeroesOptions detect_zeroes = BLOCKDEV_DETECT_ZEROES_OPTIONS_OFF;
 
     /* The client thread uses SIGTERM to interrupt the server.  A signal
      * handler ensures that "qemu-nbd -v -c" exits with a nice status code.
@@ -483,6 +490,23 @@ int main(int argc, char **argv)
                 errx(EXIT_FAILURE, "Invalid discard mode `%s'", optarg);
             }
             break;
+        case QEMU_NBD_OPT_DETECT_ZEROES:
+            detect_zeroes =
+                qapi_enum_parse(BlockdevDetectZeroesOptions_lookup,
+                                optarg,
+                                BLOCKDEV_DETECT_ZEROES_OPTIONS_MAX,
+                                BLOCKDEV_DETECT_ZEROES_OPTIONS_OFF,
+                                &local_err);
+            if (local_err) {
+                errx(EXIT_FAILURE, "Failed to parse detect_zeroes mode: %s", 
+                     error_get_pretty(local_err));
+            }
+            if (detect_zeroes == BLOCKDEV_DETECT_ZEROES_OPTIONS_UNMAP &&
+                !(flags & BDRV_O_UNMAP)) {
+                errx(EXIT_FAILURE, "setting detect-zeroes to unmap is not allowed "
+                                   "without setting discard operation to unmap"); 
+            }
+            break;
         case 'b':
             bindto = optarg;
             break;
@@ -686,6 +710,7 @@ int main(int argc, char **argv)
             error_get_pretty(local_err));
     }
 
+    bs->detect_zeroes = detect_zeroes;
     fd_size = bdrv_getlength(bs);
 
     if (partition != -1) {
commit 9e7dac7c6c6003ad9d4aca0125f0278233fcf761
Author: Peter Lieven <pl at kamp.de>
Date:   Wed Aug 13 19:20:17 2014 +0200

    rename parse_enum_option to qapi_enum_parse and make it public
    
    relaxing the license to LGPLv2+ is intentional.
    
    Suggested-by: Markus Armbruster <armbru at redhat.com>
    Signed-off-by: Hu Tao <hutao at cn.fujitsu.com>
    Signed-off-by: Peter Lieven <pl at kamp.de>
    Reviewed-by: Eric Blake <eblake at redhat.com>
    Reviewed-by: Benoit Canet <benoit.canet at nodalink.com>
    Signed-off-by: Stefan Hajnoczi <stefanha at redhat.com>

diff --git a/blockdev.c b/blockdev.c
index e37b068..e919566 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -39,6 +39,7 @@
 #include "qapi/qmp/types.h"
 #include "qapi-visit.h"
 #include "qapi/qmp-output-visitor.h"
+#include "qapi/util.h"
 #include "sysemu/sysemu.h"
 #include "block/block_int.h"
 #include "qmp-commands.h"
@@ -274,25 +275,6 @@ static int parse_block_error_action(const char *buf, bool is_read, Error **errp)
     }
 }
 
-static inline int parse_enum_option(const char *lookup[], const char *buf,
-                                    int max, int def, Error **errp)
-{
-    int i;
-
-    if (!buf) {
-        return def;
-    }
-
-    for (i = 0; i < max; i++) {
-        if (!strcmp(buf, lookup[i])) {
-            return i;
-        }
-    }
-
-    error_setg(errp, "invalid parameter value: %s", buf);
-    return def;
-}
-
 static bool check_throttle_config(ThrottleConfig *cfg, Error **errp)
 {
     if (throttle_conflicting(cfg)) {
@@ -456,11 +438,11 @@ static DriveInfo *blockdev_init(const char *file, QDict *bs_opts,
     }
 
     detect_zeroes =
-        parse_enum_option(BlockdevDetectZeroesOptions_lookup,
-                          qemu_opt_get(opts, "detect-zeroes"),
-                          BLOCKDEV_DETECT_ZEROES_OPTIONS_MAX,
-                          BLOCKDEV_DETECT_ZEROES_OPTIONS_OFF,
-                          &error);
+        qapi_enum_parse(BlockdevDetectZeroesOptions_lookup,
+                        qemu_opt_get(opts, "detect-zeroes"),
+                        BLOCKDEV_DETECT_ZEROES_OPTIONS_MAX,
+                        BLOCKDEV_DETECT_ZEROES_OPTIONS_OFF,
+                        &error);
     if (error) {
         error_propagate(errp, error);
         goto early_err;
diff --git a/include/qapi/util.h b/include/qapi/util.h
new file mode 100644
index 0000000..de9238b
--- /dev/null
+++ b/include/qapi/util.h
@@ -0,0 +1,17 @@
+/*
+ * QAPI util functions
+ *
+ * Copyright Fujitsu, Inc. 2014
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ *
+ */
+
+#ifndef QAPI_UTIL_H
+#define QAPI_UTIL_H
+
+int qapi_enum_parse(const char *lookup[], const char *buf,
+                    int max, int def, Error **errp);
+
+#endif
diff --git a/qapi/Makefile.objs b/qapi/Makefile.objs
index d14b769..2278970 100644
--- a/qapi/Makefile.objs
+++ b/qapi/Makefile.objs
@@ -1,6 +1,6 @@
 util-obj-y = qapi-visit-core.o qapi-dealloc-visitor.o qmp-input-visitor.o
 util-obj-y += qmp-output-visitor.o qmp-registry.o qmp-dispatch.o
 util-obj-y += string-input-visitor.o string-output-visitor.o
-
 util-obj-y += opts-visitor.o
 util-obj-y += qmp-event.o
+util-obj-y += qapi-util.o
diff --git a/qapi/qapi-util.c b/qapi/qapi-util.c
new file mode 100644
index 0000000..1d8fb96
--- /dev/null
+++ b/qapi/qapi-util.c
@@ -0,0 +1,34 @@
+/*
+ * QAPI util functions
+ *
+ * Authors:
+ *  Hu Tao       <hutao at cn.fujitsu.com>
+ *  Peter Lieven <pl at kamp.de>
+ * 
+ * This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ *
+ */
+
+#include "qemu-common.h"
+#include "qapi/error.h"
+#include "qapi/util.h"
+
+int qapi_enum_parse(const char *lookup[], const char *buf,
+                    int max, int def, Error **errp)
+{
+    int i;
+
+    if (!buf) {
+        return def;
+    }
+
+    for (i = 0; i < max; i++) {
+        if (!strcmp(buf, lookup[i])) {
+            return i;
+        }
+    }
+
+    error_setg(errp, "invalid parameter value: %s", buf);
+    return def;
+}
commit 072f9ac44aac54ee9ac006e8b42841abfde48a8b
Author: Chrysostomos Nanakos <cnanakos at grnet.gr>
Date:   Tue Sep 2 15:41:34 2014 +0300

    block/archipelago: Use QEMU atomic builtins
    
    Replace __sync builtins with ones provided by QEMU
    for atomic operations.
    
    Special thanks goes to Paolo Bonzini for his refactoring
    suggestion in order to use the already existing atomic builtins
    interface.
    
    Signed-off-by: Chrysostomos Nanakos <cnanakos at grnet.gr>
    Signed-off-by: Stefan Hajnoczi <stefanha at redhat.com>

diff --git a/block/archipelago.c b/block/archipelago.c
index 34f72dc..22a7daa 100644
--- a/block/archipelago.c
+++ b/block/archipelago.c
@@ -57,6 +57,7 @@
 #include "qapi/qmp/qint.h"
 #include "qapi/qmp/qstring.h"
 #include "qapi/qmp/qjson.h"
+#include "qemu/atomic.h"
 
 #include <inttypes.h>
 #include <xseg/xseg.h>
@@ -214,7 +215,7 @@ static void xseg_request_handler(void *state)
 
                 xseg_put_request(s->xseg, req, s->srcport);
 
-                if ((__sync_add_and_fetch(&segreq->ref, -1)) == 0) {
+                if (atomic_fetch_dec(&segreq->ref) == 1) {
                     if (!segreq->failed) {
                         reqdata->aio_cb->ret = segreq->count;
                         archipelago_finish_aiocb(reqdata);
@@ -233,7 +234,7 @@ static void xseg_request_handler(void *state)
                 segreq->count += req->serviced;
                 xseg_put_request(s->xseg, req, s->srcport);
 
-                if ((__sync_add_and_fetch(&segreq->ref, -1)) == 0) {
+                if (atomic_fetch_dec(&segreq->ref) == 1) {
                     if (!segreq->failed) {
                         reqdata->aio_cb->ret = segreq->count;
                         archipelago_finish_aiocb(reqdata);
@@ -824,78 +825,47 @@ static int archipelago_aio_segmented_rw(BDRVArchipelagoState *s,
                                         ArchipelagoAIOCB *aio_cb,
                                         int op)
 {
-    int i, ret, segments_nr, last_segment_size;
+    int ret, segments_nr;
+    size_t pos = 0;
     ArchipelagoSegmentedRequest *segreq;
 
-    segreq = g_new(ArchipelagoSegmentedRequest, 1);
+    segreq = g_new0(ArchipelagoSegmentedRequest, 1);
 
     if (op == ARCHIP_OP_FLUSH) {
         segments_nr = 1;
-        segreq->ref = segments_nr;
-        segreq->total = count;
-        segreq->count = 0;
-        segreq->failed = 0;
-        ret = archipelago_submit_request(s, 0, count, offset, aio_cb,
-                                           segreq, ARCHIP_OP_FLUSH);
-        if (ret < 0) {
-            goto err_exit;
-        }
-        return 0;
+    } else {
+        segments_nr = (int)(count / MAX_REQUEST_SIZE) + \
+                      ((count % MAX_REQUEST_SIZE) ? 1 : 0);
     }
-
-    segments_nr = (int)(count / MAX_REQUEST_SIZE) + \
-                  ((count % MAX_REQUEST_SIZE) ? 1 : 0);
-    last_segment_size = (int)(count % MAX_REQUEST_SIZE);
-
-    segreq->ref = segments_nr;
     segreq->total = count;
-    segreq->count = 0;
-    segreq->failed = 0;
+    atomic_mb_set(&segreq->ref, segments_nr);
 
-    for (i = 0; i < segments_nr - 1; i++) {
-        ret = archipelago_submit_request(s, i * MAX_REQUEST_SIZE,
-                                           MAX_REQUEST_SIZE,
-                                           offset + i * MAX_REQUEST_SIZE,
-                                           aio_cb, segreq, op);
+    while (segments_nr > 1) {
+        ret = archipelago_submit_request(s, pos,
+                                            MAX_REQUEST_SIZE,
+                                            offset + pos,
+                                            aio_cb, segreq, op);
 
         if (ret < 0) {
             goto err_exit;
         }
+        count -= MAX_REQUEST_SIZE;
+        pos += MAX_REQUEST_SIZE;
+        segments_nr--;
     }
-
-    if ((segments_nr > 1) && last_segment_size) {
-        ret = archipelago_submit_request(s, i * MAX_REQUEST_SIZE,
-                                           last_segment_size,
-                                           offset + i * MAX_REQUEST_SIZE,
-                                           aio_cb, segreq, op);
-    } else if ((segments_nr > 1) && !last_segment_size) {
-        ret = archipelago_submit_request(s, i * MAX_REQUEST_SIZE,
-                                           MAX_REQUEST_SIZE,
-                                           offset + i * MAX_REQUEST_SIZE,
-                                           aio_cb, segreq, op);
-    } else if (segments_nr == 1) {
-            ret = archipelago_submit_request(s, 0, count, offset, aio_cb,
-                                               segreq, op);
-    }
+    ret = archipelago_submit_request(s, pos, count, offset + pos,
+                                     aio_cb, segreq, op);
 
     if (ret < 0) {
         goto err_exit;
     }
-
     return 0;
 
 err_exit:
-    __sync_add_and_fetch(&segreq->failed, 1);
-    if (segments_nr == 1) {
-        if (__sync_add_and_fetch(&segreq->ref, -1) == 0) {
-            g_free(segreq);
-        }
-    } else {
-        if ((__sync_add_and_fetch(&segreq->ref, -segments_nr + i)) == 0) {
-            g_free(segreq);
-        }
+    segreq->failed = 1;
+    if (atomic_fetch_sub(&segreq->ref, segments_nr) == segments_nr) {
+        g_free(segreq);
     }
-
     return ret;
 }
 
commit 3ba6796d080a90440573ef29d657e4902be7e238
Author: Stefan Hajnoczi <stefanha at redhat.com>
Date:   Tue Sep 2 11:01:03 2014 +0100

    qemu-img: fix rebase src_cache option documentation
    
    The src_cache option (-T) specifies the cache mode for backing files.
    It applies both the image's old backing file as well as the new backing
    file:
    
      ret = bdrv_open(&bs_old_backing, backing_name, NULL, NULL, src_flags,
                      old_backing_drv, &local_err);
      if (ret) {
          ...
      }
      if (out_baseimg[0]) {
          bs_new_backing = bdrv_new("new_backing", &error_abort);
          ret = bdrv_open(&bs_new_backing, out_baseimg, NULL, NULL, src_flags,
                          new_backing_drv, &local_err);
          if (ret) {
              ...
          }
      }
    
    The documentation only mentions the new backing file but it really
    applies to both.
    
    Suggested-by: Jeff Nelson <jenelson at redhat.com>
    Signed-off-by: Stefan Hajnoczi <stefanha at redhat.com>
    Reviewed-by: Max Reitz <mreitz at redhat.com>

diff --git a/qemu-img.texi b/qemu-img.texi
index 4380d56..cc4668e 100644
--- a/qemu-img.texi
+++ b/qemu-img.texi
@@ -341,7 +341,7 @@ string), then the image is rebased onto no backing file (i.e. it will exist
 independently of any backing file).
 
 @var{cache} specifies the cache mode to be used for @var{filename}, whereas
- at var{src_cache} specifies the cache mode for reading the new backing file.
+ at var{src_cache} specifies the cache mode for reading backing files.
 
 There are two different modes in which @code{rebase} can operate:
 @table @option
commit bb87fdf871d321895b8f5c481977df7a3f74a765
Author: Stefan Hajnoczi <stefanha at redhat.com>
Date:   Tue Sep 2 11:01:02 2014 +0100

    qemu-img: clarify src_cache option documentation
    
    The source cache option takes the same values as the cache option.  The
    documentation reads a little strange because it starts with "In contrast
    the src_cache option ...".  The fact that this is comparing with the
    previous documented option (the 'cache' option) is implicit.  Readers
    may be confused, especially if they jump to src_cache without reading
    cache documentation first.
    
    Suggested-by: Jeff Nelson <jenelson at redhat.com>
    Signed-off-by: Stefan Hajnoczi <stefanha at redhat.com>
    Reviewed-by: Max Reitz <mreitz at redhat.com>

diff --git a/qemu-img.c b/qemu-img.c
index ff29ed1..91d1ac3 100644
--- a/qemu-img.c
+++ b/qemu-img.c
@@ -95,7 +95,8 @@ static void QEMU_NORETURN help(void)
            "  'cache' is the cache mode used to write the output disk image, the valid\n"
            "    options are: 'none', 'writeback' (default, except for convert), 'writethrough',\n"
            "    'directsync' and 'unsafe' (default for convert)\n"
-           "  'src_cache' in contrast is the cache mode used to read input disk images\n"
+           "  'src_cache' is the cache mode used to read input disk images, the valid\n"
+           "    options are the same as for the 'cache' option\n"
            "  'size' is the disk image size in bytes. Optional suffixes\n"
            "    'k' or 'K' (kilobyte, 1024), 'M' (megabyte, 1024k), 'G' (gigabyte, 1024M),\n"
            "    'T' (terabyte, 1024G), 'P' (petabyte, 1024T) and 'E' (exabyte, 1024P)  are\n"
diff --git a/qemu-img.texi b/qemu-img.texi
index cb68948..4380d56 100644
--- a/qemu-img.texi
+++ b/qemu-img.texi
@@ -73,8 +73,9 @@ specifies the cache mode that should be used with the (destination) file. See
 the documentation of the emulator's @code{-drive cache=...} option for allowed
 values.
 @item -T @var{src_cache}
-in contrast specifies the cache mode that should be used with the source
-file(s).
+specifies the cache mode that should be used with the source file(s). See
+the documentation of the emulator's @code{-drive cache=...} option for allowed
+values.
 @end table
 
 Parameters to snapshot subcommand:
commit 1053587c3fb50fb78e18a2e32b90e272c1796de0
Author: Marc MarÃ <marc.mari.barcelo at gmail.com>
Date:   Mon Sep 1 12:08:00 2014 +0200

    libqos: Added EVENT_IDX support
    
    Added avail_event and NO_NOTIFY check before notifying.
    Added used_event setting.
    
    Signed-off-by: Marc MarÃ <marc.mari.barcelo at gmail.com>
    Signed-off-by: Stefan Hajnoczi <stefanha at redhat.com>

diff --git a/tests/libqos/virtio-pci.c b/tests/libqos/virtio-pci.c
index ab28717..788ebaf 100644
--- a/tests/libqos/virtio-pci.c
+++ b/tests/libqos/virtio-pci.c
@@ -203,6 +203,7 @@ static QVirtQueue *qvirtio_pci_virtqueue_setup(QVirtioDevice *d,
     vqpci->vq.num_free = vqpci->vq.size;
     vqpci->vq.align = QVIRTIO_PCI_ALIGN;
     vqpci->vq.indirect = (feat & QVIRTIO_F_RING_INDIRECT_DESC) != 0;
+    vqpci->vq.event = (feat & QVIRTIO_F_RING_EVENT_IDX) != 0;
 
     vqpci->msix_entry = -1;
     vqpci->msix_addr = 0;
diff --git a/tests/libqos/virtio.c b/tests/libqos/virtio.c
index 16eaf79..128dbd0 100644
--- a/tests/libqos/virtio.c
+++ b/tests/libqos/virtio.c
@@ -124,9 +124,13 @@ void qvring_init(const QGuestAllocator *alloc, QVirtQueue *vq, uint64_t addr)
     writew(vq->avail, 0);
     /* vq->avail->idx */
     writew(vq->avail + 2, 0);
+    /* vq->avail->used_event */
+    writew(vq->avail + 4 + (2 * vq->size), 0);
 
     /* vq->used->flags */
     writew(vq->used, 0);
+    /* vq->used->avail_event */
+    writew(vq->used+2+(sizeof(struct QVRingUsedElem)*vq->size), 0);
 }
 
 QVRingIndirectDesc *qvring_indirect_desc_setup(QVirtioDevice *d,
@@ -222,11 +226,32 @@ void qvirtqueue_kick(const QVirtioBus *bus, QVirtioDevice *d, QVirtQueue *vq,
 {
     /* vq->avail->idx */
     uint16_t idx = readl(vq->avail + 2);
+    /* vq->used->flags */
+    uint16_t flags;
+    /* vq->used->avail_event */
+    uint16_t avail_event;
 
     /* vq->avail->ring[idx % vq->size] */
     writel(vq->avail + 4 + (2 * (idx % vq->size)), free_head);
     /* vq->avail->idx */
     writel(vq->avail + 2, idx + 1);
 
-    bus->virtqueue_kick(d, vq);
+    /* Must read after idx is updated */
+    flags = readw(vq->avail);
+    avail_event = readw(vq->used + 4 +
+                                (sizeof(struct QVRingUsedElem) * vq->size));
+
+    /* < 1 because we add elements to avail queue one by one */
+    if ((flags & QVRING_USED_F_NO_NOTIFY) == 0 &&
+                            (!vq->event || (uint16_t)(idx-avail_event) < 1)) {
+        bus->virtqueue_kick(d, vq);
+    }
+}
+
+void qvirtqueue_set_used_event(QVirtQueue *vq, uint16_t idx)
+{
+    g_assert(vq->event);
+
+    /* vq->avail->used_event */
+    writew(vq->avail + 4 + (2 * vq->size), idx);
 }
diff --git a/tests/libqos/virtio.h b/tests/libqos/virtio.h
index cebccd2..70b3376 100644
--- a/tests/libqos/virtio.h
+++ b/tests/libqos/virtio.h
@@ -26,6 +26,7 @@
 #define QVIRTIO_F_ANY_LAYOUT            0x08000000
 #define QVIRTIO_F_RING_INDIRECT_DESC    0x10000000
 #define QVIRTIO_F_RING_EVENT_IDX        0x20000000
+#define QVIRTIO_F_BAD_FEATURE           0x40000000
 
 #define QVRING_DESC_F_NEXT      0x1
 #define QVRING_DESC_F_WRITE     0x2
@@ -57,6 +58,7 @@ typedef struct QVRingAvail {
     uint16_t flags;
     uint16_t idx;
     uint16_t ring[0]; /* This is an array of uint16_t */
+    uint16_t used_event;
 } QVRingAvail;
 
 typedef struct QVRingUsedElem {
@@ -68,6 +70,7 @@ typedef struct QVRingUsed {
     uint16_t flags;
     uint16_t idx;
     QVRingUsedElem ring[0]; /* This is an array of QVRingUsedElem structs */
+    uint16_t avail_event;
 } QVRingUsed;
 
 typedef struct QVirtQueue {
@@ -80,6 +83,7 @@ typedef struct QVirtQueue {
     uint32_t num_free;
     uint32_t align;
     bool indirect;
+    bool event;
 } QVirtQueue;
 
 typedef struct QVRingIndirectDesc {
@@ -174,4 +178,5 @@ uint32_t qvirtqueue_add_indirect(QVirtQueue *vq, QVRingIndirectDesc *indirect);
 void qvirtqueue_kick(const QVirtioBus *bus, QVirtioDevice *d, QVirtQueue *vq,
                                                             uint32_t free_head);
 
+void qvirtqueue_set_used_event(QVirtQueue *vq, uint16_t idx);
 #endif
diff --git a/tests/virtio-blk-test.c b/tests/virtio-blk-test.c
index 0100aaa..588666c 100644
--- a/tests/virtio-blk-test.c
+++ b/tests/virtio-blk-test.c
@@ -496,6 +496,129 @@ static void pci_msix(void)
 
     qvirtqueue_kick(&qvirtio_pci, &dev->vdev, &vqpci->vq, free_head);
 
+
+    g_assert(qvirtio_wait_queue_isr(&qvirtio_pci, &dev->vdev, &vqpci->vq,
+                                                        QVIRTIO_BLK_TIMEOUT));
+
+    status = readb(req_addr + 528);
+    g_assert_cmpint(status, ==, 0);
+
+    data = g_malloc0(512);
+    memread(req_addr + 16, data, 512);
+    g_assert_cmpstr(data, ==, "TEST");
+    g_free(data);
+
+    guest_free(alloc, req_addr);
+
+    /* End test */
+    guest_free(alloc, (uint64_t)vqpci->vq.desc);
+    qpci_msix_disable(dev->pdev);
+    qvirtio_pci_device_disable(dev);
+    g_free(dev);
+    test_end();
+}
+
+static void pci_idx(void)
+{
+    QVirtioPCIDevice *dev;
+    QPCIBus *bus;
+    QVirtQueuePCI *vqpci;
+    QGuestAllocator *alloc;
+    QVirtioBlkReq req;
+    void *addr;
+    uint64_t req_addr;
+    uint64_t capacity;
+    uint32_t features;
+    uint32_t free_head;
+    uint8_t status;
+    char *data;
+
+    bus = test_start();
+    alloc = pc_alloc_init();
+
+    dev = virtio_blk_init(bus);
+    qpci_msix_enable(dev->pdev);
+
+    qvirtio_pci_set_msix_configuration_vector(dev, alloc, 0);
+
+    /* MSI-X is enabled */
+    addr = dev->addr + QVIRTIO_DEVICE_SPECIFIC_MSIX;
+
+    capacity = qvirtio_config_readq(&qvirtio_pci, &dev->vdev, addr);
+    g_assert_cmpint(capacity, ==, TEST_IMAGE_SIZE / 512);
+
+    features = qvirtio_get_features(&qvirtio_pci, &dev->vdev);
+    features = features & ~(QVIRTIO_F_BAD_FEATURE |
+                            QVIRTIO_F_RING_INDIRECT_DESC |
+                            QVIRTIO_F_NOTIFY_ON_EMPTY | QVIRTIO_BLK_F_SCSI);
+    qvirtio_set_features(&qvirtio_pci, &dev->vdev, features);
+
+    vqpci = (QVirtQueuePCI *)qvirtqueue_setup(&qvirtio_pci, &dev->vdev,
+                                                                    alloc, 0);
+    qvirtqueue_pci_msix_setup(dev, vqpci, alloc, 1);
+
+    qvirtio_set_driver_ok(&qvirtio_pci, &dev->vdev);
+
+    /* Write request */
+    req.type = QVIRTIO_BLK_T_OUT;
+    req.ioprio = 1;
+    req.sector = 0;
+    req.data = g_malloc0(512);
+    strcpy(req.data, "TEST");
+
+    req_addr = virtio_blk_request(alloc, &req, 512);
+
+    g_free(req.data);
+
+    free_head = qvirtqueue_add(&vqpci->vq, req_addr, 528, false, true);
+    qvirtqueue_add(&vqpci->vq, req_addr + 528, 1, true, false);
+    qvirtqueue_kick(&qvirtio_pci, &dev->vdev, &vqpci->vq, free_head);
+
+    g_assert(qvirtio_wait_queue_isr(&qvirtio_pci, &dev->vdev, &vqpci->vq,
+                                                        QVIRTIO_BLK_TIMEOUT));
+
+    /* Write request */
+    req.type = QVIRTIO_BLK_T_OUT;
+    req.ioprio = 1;
+    req.sector = 1;
+    req.data = g_malloc0(512);
+    strcpy(req.data, "TEST");
+
+    req_addr = virtio_blk_request(alloc, &req, 512);
+
+    g_free(req.data);
+
+    /* Notify after processing the third request */
+    qvirtqueue_set_used_event(&vqpci->vq, 2);
+    free_head = qvirtqueue_add(&vqpci->vq, req_addr, 528, false, true);
+    qvirtqueue_add(&vqpci->vq, req_addr + 528, 1, true, false);
+    qvirtqueue_kick(&qvirtio_pci, &dev->vdev, &vqpci->vq, free_head);
+
+    /* No notification expected */
+    g_assert(!qvirtio_wait_queue_isr(&qvirtio_pci, &dev->vdev, &vqpci->vq,
+                                                        QVIRTIO_BLK_TIMEOUT));
+
+    status = readb(req_addr + 528);
+    g_assert_cmpint(status, ==, 0);
+
+    guest_free(alloc, req_addr);
+
+    /* Read request */
+    req.type = QVIRTIO_BLK_T_IN;
+    req.ioprio = 1;
+    req.sector = 1;
+    req.data = g_malloc0(512);
+
+    req_addr = virtio_blk_request(alloc, &req, 512);
+
+    g_free(req.data);
+
+    free_head = qvirtqueue_add(&vqpci->vq, req_addr, 16, false, true);
+    qvirtqueue_add(&vqpci->vq, req_addr + 16, 513, true, false);
+
+    qvirtqueue_kick(&qvirtio_pci, &dev->vdev, &vqpci->vq, free_head);
+
+
     g_assert(qvirtio_wait_queue_isr(&qvirtio_pci, &dev->vdev, &vqpci->vq,
                                                         QVIRTIO_BLK_TIMEOUT));
 
@@ -527,6 +650,7 @@ int main(int argc, char **argv)
     g_test_add_func("/virtio/blk/pci/indirect", pci_indirect);
     g_test_add_func("/virtio/blk/pci/config", pci_config);
     g_test_add_func("/virtio/blk/pci/msix", pci_msix);
+    g_test_add_func("/virtio/blk/pci/idx", pci_idx);
 
     ret = g_test_run();
 
commit 58368113989403775496b3422f22094713703157
Author: Marc MarÃ <marc.mari.barcelo at gmail.com>
Date:   Mon Sep 1 12:07:59 2014 +0200

    libqos: Added MSI-X support
    
    Added MSI-X support for qtest PCI.
    Added MSI-X support for virtio-pci.
    Added MSI-X test case in virtio-blk-test.
    
    Signed-off-by: Marc MarÃ <marc.mari.barcelo at gmail.com>
    Signed-off-by: Stefan Hajnoczi <stefanha at redhat.com>

diff --git a/tests/libqos/pci.c b/tests/libqos/pci.c
index ce0b308..d5ce683 100644
--- a/tests/libqos/pci.c
+++ b/tests/libqos/pci.c
@@ -15,8 +15,6 @@
 #include "hw/pci/pci_regs.h"
 #include <glib.h>
 
-#include <stdio.h>
-
 void qpci_device_foreach(QPCIBus *bus, int vendor_id, int device_id,
                          void (*func)(QPCIDevice *dev, int devfn, void *data),
                          void *data)
@@ -75,6 +73,115 @@ void qpci_device_enable(QPCIDevice *dev)
     qpci_config_writew(dev, PCI_COMMAND, cmd);
 }
 
+uint8_t qpci_find_capability(QPCIDevice *dev, uint8_t id)
+{
+    uint8_t cap;
+    uint8_t addr = qpci_config_readb(dev, PCI_CAPABILITY_LIST);
+
+    do {
+        cap = qpci_config_readb(dev, addr);
+        if (cap != id) {
+            addr = qpci_config_readb(dev, addr + PCI_CAP_LIST_NEXT);
+        }
+    } while (cap != id && addr != 0);
+
+    return addr;
+}
+
+void qpci_msix_enable(QPCIDevice *dev)
+{
+    uint8_t addr;
+    uint16_t val;
+    uint32_t table;
+    uint8_t bir_table;
+    uint8_t bir_pba;
+    void *offset;
+
+    addr = qpci_find_capability(dev, PCI_CAP_ID_MSIX);
+    g_assert_cmphex(addr, !=, 0);
+
+    val = qpci_config_readw(dev, addr + PCI_MSIX_FLAGS);
+    qpci_config_writew(dev, addr + PCI_MSIX_FLAGS, val | PCI_MSIX_FLAGS_ENABLE);
+
+    table = qpci_config_readl(dev, addr + PCI_MSIX_TABLE);
+    bir_table = table & PCI_MSIX_FLAGS_BIRMASK;
+    offset = qpci_iomap(dev, bir_table, NULL);
+    dev->msix_table = offset + (table & ~PCI_MSIX_FLAGS_BIRMASK);
+
+    table = qpci_config_readl(dev, addr + PCI_MSIX_PBA);
+    bir_pba = table & PCI_MSIX_FLAGS_BIRMASK;
+    if (bir_pba != bir_table) {
+        offset = qpci_iomap(dev, bir_pba, NULL);
+    }
+    dev->msix_pba = offset + (table & ~PCI_MSIX_FLAGS_BIRMASK);
+
+    g_assert(dev->msix_table != NULL);
+    g_assert(dev->msix_pba != NULL);
+    dev->msix_enabled = true;
+}
+
+void qpci_msix_disable(QPCIDevice *dev)
+{
+    uint8_t addr;
+    uint16_t val;
+
+    g_assert(dev->msix_enabled);
+    addr = qpci_find_capability(dev, PCI_CAP_ID_MSIX);
+    g_assert_cmphex(addr, !=, 0);
+    val = qpci_config_readw(dev, addr + PCI_MSIX_FLAGS);
+    qpci_config_writew(dev, addr + PCI_MSIX_FLAGS,
+                                                val & ~PCI_MSIX_FLAGS_ENABLE);
+
+    qpci_iounmap(dev, dev->msix_table);
+    qpci_iounmap(dev, dev->msix_pba);
+    dev->msix_enabled = 0;
+    dev->msix_table = NULL;
+    dev->msix_pba = NULL;
+}
+
+bool qpci_msix_pending(QPCIDevice *dev, uint16_t entry)
+{
+    uint32_t pba_entry;
+    uint8_t bit_n = entry % 32;
+    void *addr = dev->msix_pba + (entry / 32) * PCI_MSIX_ENTRY_SIZE / 4;
+
+    g_assert(dev->msix_enabled);
+    pba_entry = qpci_io_readl(dev, addr);
+    qpci_io_writel(dev, addr, pba_entry & ~(1 << bit_n));
+    return (pba_entry & (1 << bit_n)) != 0;
+}
+
+bool qpci_msix_masked(QPCIDevice *dev, uint16_t entry)
+{
+    uint8_t addr;
+    uint16_t val;
+    void *vector_addr = dev->msix_table + (entry * PCI_MSIX_ENTRY_SIZE);
+
+    g_assert(dev->msix_enabled);
+    addr = qpci_find_capability(dev, PCI_CAP_ID_MSIX);
+    g_assert_cmphex(addr, !=, 0);
+    val = qpci_config_readw(dev, addr + PCI_MSIX_FLAGS);
+
+    if (val & PCI_MSIX_FLAGS_MASKALL) {
+        return true;
+    } else {
+        return (qpci_io_readl(dev, vector_addr + PCI_MSIX_ENTRY_VECTOR_CTRL)
+                                            & PCI_MSIX_ENTRY_CTRL_MASKBIT) != 0;
+    }
+}
+
+uint16_t qpci_msix_table_size(QPCIDevice *dev)
+{
+    uint8_t addr;
+    uint16_t control;
+
+    addr = qpci_find_capability(dev, PCI_CAP_ID_MSIX);
+    g_assert_cmphex(addr, !=, 0);
+
+    control = qpci_config_readw(dev, addr + PCI_MSIX_FLAGS);
+    return (control & PCI_MSIX_FLAGS_QSIZE) + 1;
+}
+
 uint8_t qpci_config_readb(QPCIDevice *dev, uint8_t offset)
 {
     return dev->bus->config_readb(dev->bus, dev->devfn, offset);
diff --git a/tests/libqos/pci.h b/tests/libqos/pci.h
index 9ee048b..d51eb9e 100644
--- a/tests/libqos/pci.h
+++ b/tests/libqos/pci.h
@@ -14,6 +14,7 @@
 #define LIBQOS_PCI_H
 
 #include <stdint.h>
+#include "libqtest.h"
 
 #define QPCI_DEVFN(dev, fn) (((dev) << 3) | (fn))
 
@@ -49,6 +50,9 @@ struct QPCIDevice
 {
     QPCIBus *bus;
     int devfn;
+    bool msix_enabled;
+    void *msix_table;
+    void *msix_pba;
 };
 
 void qpci_device_foreach(QPCIBus *bus, int vendor_id, int device_id,
@@ -57,6 +61,12 @@ void qpci_device_foreach(QPCIBus *bus, int vendor_id, int device_id,
 QPCIDevice *qpci_device_find(QPCIBus *bus, int devfn);
 
 void qpci_device_enable(QPCIDevice *dev);
+uint8_t qpci_find_capability(QPCIDevice *dev, uint8_t id);
+void qpci_msix_enable(QPCIDevice *dev);
+void qpci_msix_disable(QPCIDevice *dev);
+bool qpci_msix_pending(QPCIDevice *dev, uint16_t entry);
+bool qpci_msix_masked(QPCIDevice *dev, uint16_t entry);
+uint16_t qpci_msix_table_size(QPCIDevice *dev);
 
 uint8_t qpci_config_readb(QPCIDevice *dev, uint8_t offset);
 uint16_t qpci_config_readw(QPCIDevice *dev, uint8_t offset);
diff --git a/tests/libqos/virtio-pci.c b/tests/libqos/virtio-pci.c
index cf15f7a..ab28717 100644
--- a/tests/libqos/virtio-pci.c
+++ b/tests/libqos/virtio-pci.c
@@ -36,6 +36,8 @@ static QVirtioPCIDevice *qpcidevice_to_qvirtiodevice(QPCIDevice *pdev)
                             qpci_config_readw(vpcidev->pdev, PCI_SUBSYSTEM_ID);
     }
 
+    vpcidev->config_msix_entry = -1;
+
     return vpcidev;
 }
 
@@ -125,10 +127,45 @@ static void qvirtio_pci_set_status(QVirtioDevice *d, uint8_t status)
     qpci_io_writeb(dev->pdev, dev->addr + QVIRTIO_DEVICE_STATUS, status);
 }
 
-static uint8_t qvirtio_pci_get_isr_status(QVirtioDevice *d)
+static bool qvirtio_pci_get_queue_isr_status(QVirtioDevice *d, QVirtQueue *vq)
 {
     QVirtioPCIDevice *dev = (QVirtioPCIDevice *)d;
-    return qpci_io_readb(dev->pdev, dev->addr + QVIRTIO_ISR_STATUS);
+    QVirtQueuePCI *vqpci = (QVirtQueuePCI *)vq;
+    uint32_t data;
+
+    if (dev->pdev->msix_enabled) {
+        g_assert_cmpint(vqpci->msix_entry, !=, -1);
+        if (qpci_msix_masked(dev->pdev, vqpci->msix_entry)) {
+            /* No ISR checking should be done if masked, but read anyway */
+            return qpci_msix_pending(dev->pdev, vqpci->msix_entry);
+        } else {
+            data = readl(vqpci->msix_addr);
+            writel(vqpci->msix_addr, 0);
+            return data == vqpci->msix_data;
+        }
+    } else {
+        return qpci_io_readb(dev->pdev, dev->addr + QVIRTIO_ISR_STATUS) & 1;
+    }
+}
+
+static bool qvirtio_pci_get_config_isr_status(QVirtioDevice *d)
+{
+    QVirtioPCIDevice *dev = (QVirtioPCIDevice *)d;
+    uint32_t data;
+
+    if (dev->pdev->msix_enabled) {
+        g_assert_cmpint(dev->config_msix_entry, !=, -1);
+        if (qpci_msix_masked(dev->pdev, dev->config_msix_entry)) {
+            /* No ISR checking should be done if masked, but read anyway */
+            return qpci_msix_pending(dev->pdev, dev->config_msix_entry);
+        } else {
+            data = readl(dev->config_msix_addr);
+            writel(dev->config_msix_addr, 0);
+            return data == dev->config_msix_data;
+        }
+    } else {
+        return qpci_io_readb(dev->pdev, dev->addr + QVIRTIO_ISR_STATUS) & 2;
+    }
 }
 
 static void qvirtio_pci_queue_select(QVirtioDevice *d, uint16_t index)
@@ -154,32 +191,34 @@ static QVirtQueue *qvirtio_pci_virtqueue_setup(QVirtioDevice *d,
 {
     uint32_t feat;
     uint64_t addr;
-    QVirtQueue *vq;
+    QVirtQueuePCI *vqpci;
 
-    vq = g_malloc0(sizeof(*vq));
+    vqpci = g_malloc0(sizeof(*vqpci));
     feat = qvirtio_pci_get_guest_features(d);
 
     qvirtio_pci_queue_select(d, index);
-    vq->index = index;
-    vq->size = qvirtio_pci_get_queue_size(d);
-    vq->free_head = 0;
-    vq->num_free = vq->size;
-    vq->align = QVIRTIO_PCI_ALIGN;
-    vq->indirect = (feat & QVIRTIO_F_RING_INDIRECT_DESC) != 0;
+    vqpci->vq.index = index;
+    vqpci->vq.size = qvirtio_pci_get_queue_size(d);
+    vqpci->vq.free_head = 0;
+    vqpci->vq.num_free = vqpci->vq.size;
+    vqpci->vq.align = QVIRTIO_PCI_ALIGN;
+    vqpci->vq.indirect = (feat & QVIRTIO_F_RING_INDIRECT_DESC) != 0;
+
+    vqpci->msix_entry = -1;
+    vqpci->msix_addr = 0;
+    vqpci->msix_data = 0x12345678;
 
     /* Check different than 0 */
-    g_assert_cmpint(vq->size, !=, 0);
+    g_assert_cmpint(vqpci->vq.size, !=, 0);
 
     /* Check power of 2 */
-    g_assert_cmpint(vq->size & (vq->size - 1), ==, 0);
-
-    addr = guest_alloc(alloc, qvring_size(vq->size, QVIRTIO_PCI_ALIGN));
-    qvring_init(alloc, vq, addr);
-    qvirtio_pci_set_queue_address(d, vq->desc / QVIRTIO_PCI_ALIGN);
+    g_assert_cmpint(vqpci->vq.size & (vqpci->vq.size - 1), ==, 0);
 
-    /* TODO: MSI-X configuration */
+    addr = guest_alloc(alloc, qvring_size(vqpci->vq.size, QVIRTIO_PCI_ALIGN));
+    qvring_init(alloc, &vqpci->vq, addr);
+    qvirtio_pci_set_queue_address(d, vqpci->vq.desc / QVIRTIO_PCI_ALIGN);
 
-    return vq;
+    return &vqpci->vq;
 }
 
 static void qvirtio_pci_virtqueue_kick(QVirtioDevice *d, QVirtQueue *vq)
@@ -198,7 +237,8 @@ const QVirtioBus qvirtio_pci = {
     .get_guest_features = qvirtio_pci_get_guest_features,
     .get_status = qvirtio_pci_get_status,
     .set_status = qvirtio_pci_set_status,
-    .get_isr_status = qvirtio_pci_get_isr_status,
+    .get_queue_isr_status = qvirtio_pci_get_queue_isr_status,
+    .get_config_isr_status = qvirtio_pci_get_config_isr_status,
     .queue_select = qvirtio_pci_queue_select,
     .get_queue_size = qvirtio_pci_get_queue_size,
     .set_queue_address = qvirtio_pci_set_queue_address,
@@ -235,4 +275,68 @@ void qvirtio_pci_device_enable(QVirtioPCIDevice *d)
 void qvirtio_pci_device_disable(QVirtioPCIDevice *d)
 {
     qpci_iounmap(d->pdev, d->addr);
+    d->addr = NULL;
+}
+
+void qvirtqueue_pci_msix_setup(QVirtioPCIDevice *d, QVirtQueuePCI *vqpci,
+                                        QGuestAllocator *alloc, uint16_t entry)
+{
+    uint16_t vector;
+    uint32_t control;
+    void *addr;
+
+    g_assert(d->pdev->msix_enabled);
+    addr = d->pdev->msix_table + (entry * 16);
+
+    g_assert_cmpint(entry, >=, 0);
+    g_assert_cmpint(entry, <, qpci_msix_table_size(d->pdev));
+    vqpci->msix_entry = entry;
+
+    vqpci->msix_addr = guest_alloc(alloc, 4);
+    qpci_io_writel(d->pdev, addr + PCI_MSIX_ENTRY_LOWER_ADDR,
+                                                    vqpci->msix_addr & ~0UL);
+    qpci_io_writel(d->pdev, addr + PCI_MSIX_ENTRY_UPPER_ADDR,
+                                            (vqpci->msix_addr >> 32) & ~0UL);
+    qpci_io_writel(d->pdev, addr + PCI_MSIX_ENTRY_DATA, vqpci->msix_data);
+
+    control = qpci_io_readl(d->pdev, addr + PCI_MSIX_ENTRY_VECTOR_CTRL);
+    qpci_io_writel(d->pdev, addr + PCI_MSIX_ENTRY_VECTOR_CTRL,
+                                        control & ~PCI_MSIX_ENTRY_CTRL_MASKBIT);
+
+    qvirtio_pci_queue_select(&d->vdev, vqpci->vq.index);
+    qpci_io_writew(d->pdev, d->addr + QVIRTIO_MSIX_QUEUE_VECTOR, entry);
+    vector = qpci_io_readw(d->pdev, d->addr + QVIRTIO_MSIX_QUEUE_VECTOR);
+    g_assert_cmphex(vector, !=, QVIRTIO_MSI_NO_VECTOR);
+}
+
+void qvirtio_pci_set_msix_configuration_vector(QVirtioPCIDevice *d,
+                                        QGuestAllocator *alloc, uint16_t entry)
+{
+    uint16_t vector;
+    uint32_t control;
+    void *addr;
+
+    g_assert(d->pdev->msix_enabled);
+    addr = d->pdev->msix_table + (entry * 16);
+
+    g_assert_cmpint(entry, >=, 0);
+    g_assert_cmpint(entry, <, qpci_msix_table_size(d->pdev));
+    d->config_msix_entry = entry;
+
+    d->config_msix_data = 0x12345678;
+    d->config_msix_addr = guest_alloc(alloc, 4);
+
+    qpci_io_writel(d->pdev, addr + PCI_MSIX_ENTRY_LOWER_ADDR,
+                                                    d->config_msix_addr & ~0UL);
+    qpci_io_writel(d->pdev, addr + PCI_MSIX_ENTRY_UPPER_ADDR,
+                                            (d->config_msix_addr >> 32) & ~0UL);
+    qpci_io_writel(d->pdev, addr + PCI_MSIX_ENTRY_DATA, d->config_msix_data);
+
+    control = qpci_io_readl(d->pdev, addr + PCI_MSIX_ENTRY_VECTOR_CTRL);
+    qpci_io_writel(d->pdev, addr + PCI_MSIX_ENTRY_VECTOR_CTRL,
+                                        control & ~PCI_MSIX_ENTRY_CTRL_MASKBIT);
+
+    qpci_io_writew(d->pdev, d->addr + QVIRTIO_MSIX_CONF_VECTOR, entry);
+    vector = qpci_io_readw(d->pdev, d->addr + QVIRTIO_MSIX_CONF_VECTOR);
+    g_assert_cmphex(vector, !=, QVIRTIO_MSI_NO_VECTOR);
 }
diff --git a/tests/libqos/virtio-pci.h b/tests/libqos/virtio-pci.h
index 40bd12d..883f7ff 100644
--- a/tests/libqos/virtio-pci.h
+++ b/tests/libqos/virtio-pci.h
@@ -28,12 +28,24 @@
 
 #define QVIRTIO_PCI_ALIGN   4096
 
+#define QVIRTIO_MSI_NO_VECTOR   0xFFFF
+
 typedef struct QVirtioPCIDevice {
     QVirtioDevice vdev;
     QPCIDevice *pdev;
     void *addr;
+    uint16_t config_msix_entry;
+    uint64_t config_msix_addr;
+    uint32_t config_msix_data;
 } QVirtioPCIDevice;
 
+typedef struct QVirtQueuePCI {
+    QVirtQueue vq;
+    uint16_t msix_entry;
+    uint64_t msix_addr;
+    uint32_t msix_data;
+} QVirtQueuePCI;
+
 extern const QVirtioBus qvirtio_pci;
 
 void qvirtio_pci_foreach(QPCIBus *bus, uint16_t device_type,
@@ -41,4 +53,9 @@ void qvirtio_pci_foreach(QPCIBus *bus, uint16_t device_type,
 QVirtioPCIDevice *qvirtio_pci_device_find(QPCIBus *bus, uint16_t device_type);
 void qvirtio_pci_device_enable(QVirtioPCIDevice *d);
 void qvirtio_pci_device_disable(QVirtioPCIDevice *d);
+
+void qvirtio_pci_set_msix_configuration_vector(QVirtioPCIDevice *d,
+                                        QGuestAllocator *alloc, uint16_t entry);
+void qvirtqueue_pci_msix_setup(QVirtioPCIDevice *d, QVirtQueuePCI *vqpci,
+                                        QGuestAllocator *alloc, uint16_t entry);
 #endif
diff --git a/tests/libqos/virtio.c b/tests/libqos/virtio.c
index b1cab1f..16eaf79 100644
--- a/tests/libqos/virtio.c
+++ b/tests/libqos/virtio.c
@@ -78,12 +78,25 @@ void qvirtio_set_driver_ok(const QVirtioBus *bus, QVirtioDevice *d)
                 QVIRTIO_DRIVER_OK | QVIRTIO_DRIVER | QVIRTIO_ACKNOWLEDGE);
 }
 
-bool qvirtio_wait_isr(const QVirtioBus *bus, QVirtioDevice *d, uint8_t mask,
+bool qvirtio_wait_queue_isr(const QVirtioBus *bus, QVirtioDevice *d,
+                                            QVirtQueue *vq, uint64_t timeout)
+{
+    do {
+        clock_step(10);
+        if (bus->get_queue_isr_status(d, vq)) {
+            break; /* It has ended */
+        }
+    } while (--timeout);
+
+    return timeout != 0;
+}
+
+bool qvirtio_wait_config_isr(const QVirtioBus *bus, QVirtioDevice *d,
                                                             uint64_t timeout)
 {
     do {
         clock_step(10);
-        if (bus->get_isr_status(d) & mask) {
+        if (bus->get_config_isr_status(d)) {
             break; /* It has ended */
         }
     } while (--timeout);
diff --git a/tests/libqos/virtio.h b/tests/libqos/virtio.h
index 1860660..cebccd2 100644
--- a/tests/libqos/virtio.h
+++ b/tests/libqos/virtio.h
@@ -109,8 +109,11 @@ typedef struct QVirtioBus {
     /* Set status of the device  */
     void (*set_status)(QVirtioDevice *d, uint8_t status);
 
-    /* Get the ISR status of the device */
-    uint8_t (*get_isr_status)(QVirtioDevice *d);
+    /* Get the queue ISR status of the device */
+    bool (*get_queue_isr_status)(QVirtioDevice *d, QVirtQueue *vq);
+
+    /* Get the configuration ISR status of the device */
+    bool (*get_config_isr_status)(QVirtioDevice *d);
 
     /* Select a queue to work on */
     void (*queue_select)(QVirtioDevice *d, uint16_t index);
@@ -153,7 +156,9 @@ void qvirtio_set_acknowledge(const QVirtioBus *bus, QVirtioDevice *d);
 void qvirtio_set_driver(const QVirtioBus *bus, QVirtioDevice *d);
 void qvirtio_set_driver_ok(const QVirtioBus *bus, QVirtioDevice *d);
 
-bool qvirtio_wait_isr(const QVirtioBus *bus, QVirtioDevice *d, uint8_t mask,
+bool qvirtio_wait_queue_isr(const QVirtioBus *bus, QVirtioDevice *d,
+                                            QVirtQueue *vq, uint64_t timeout);
+bool qvirtio_wait_config_isr(const QVirtioBus *bus, QVirtioDevice *d,
                                                             uint64_t timeout);
 QVirtQueue *qvirtqueue_setup(const QVirtioBus *bus, QVirtioDevice *d,
                                         QGuestAllocator *alloc, uint16_t index);
diff --git a/tests/virtio-blk-test.c b/tests/virtio-blk-test.c
index 672580b..0100aaa 100644
--- a/tests/virtio-blk-test.c
+++ b/tests/virtio-blk-test.c
@@ -134,7 +134,7 @@ static void pci_basic(void)
 {
     QVirtioPCIDevice *dev;
     QPCIBus *bus;
-    QVirtQueue *vq;
+    QVirtQueuePCI *vqpci;
     QGuestAllocator *alloc;
     QVirtioBlkReq req;
     void *addr;
@@ -162,7 +162,8 @@ static void pci_basic(void)
     qvirtio_set_features(&qvirtio_pci, &dev->vdev, features);
 
     alloc = pc_alloc_init();
-    vq = qvirtqueue_setup(&qvirtio_pci, &dev->vdev, alloc, 0);
+    vqpci = (QVirtQueuePCI *)qvirtqueue_setup(&qvirtio_pci, &dev->vdev,
+                                                                    alloc, 0);
 
     qvirtio_set_driver_ok(&qvirtio_pci, &dev->vdev);
 
@@ -178,11 +179,11 @@ static void pci_basic(void)
 
     g_free(req.data);
 
-    free_head = qvirtqueue_add(vq, req_addr, 528, false, true);
-    qvirtqueue_add(vq, req_addr + 528, 1, true, false);
-    qvirtqueue_kick(&qvirtio_pci, &dev->vdev, vq, free_head);
+    free_head = qvirtqueue_add(&vqpci->vq, req_addr, 528, false, true);
+    qvirtqueue_add(&vqpci->vq, req_addr + 528, 1, true, false);
+    qvirtqueue_kick(&qvirtio_pci, &dev->vdev, &vqpci->vq, free_head);
 
-    g_assert(qvirtio_wait_isr(&qvirtio_pci, &dev->vdev, 0x1,
+    g_assert(qvirtio_wait_queue_isr(&qvirtio_pci, &dev->vdev, &vqpci->vq,
                                                         QVIRTIO_BLK_TIMEOUT));
     status = readb(req_addr + 528);
     g_assert_cmpint(status, ==, 0);
@@ -199,12 +200,12 @@ static void pci_basic(void)
 
     g_free(req.data);
 
-    free_head = qvirtqueue_add(vq, req_addr, 16, false, true);
-    qvirtqueue_add(vq, req_addr + 16, 513, true, false);
+    free_head = qvirtqueue_add(&vqpci->vq, req_addr, 16, false, true);
+    qvirtqueue_add(&vqpci->vq, req_addr + 16, 513, true, false);
 
-    qvirtqueue_kick(&qvirtio_pci, &dev->vdev, vq, free_head);
+    qvirtqueue_kick(&qvirtio_pci, &dev->vdev, &vqpci->vq, free_head);
 
-    g_assert(qvirtio_wait_isr(&qvirtio_pci, &dev->vdev, 0x1,
+    g_assert(qvirtio_wait_queue_isr(&qvirtio_pci, &dev->vdev, &vqpci->vq,
                                                         QVIRTIO_BLK_TIMEOUT));
     status = readb(req_addr + 528);
     g_assert_cmpint(status, ==, 0);
@@ -226,15 +227,13 @@ static void pci_basic(void)
 
     req_addr = virtio_blk_request(alloc, &req, 512);
 
-    g_free(req.data);
-
-    free_head = qvirtqueue_add(vq, req_addr, 16, false, true);
-    qvirtqueue_add(vq, req_addr + 16, 512, false, true);
-    qvirtqueue_add(vq, req_addr + 528, 1, true, false);
+    free_head = qvirtqueue_add(&vqpci->vq, req_addr, 16, false, true);
+    qvirtqueue_add(&vqpci->vq, req_addr + 16, 512, false, true);
+    qvirtqueue_add(&vqpci->vq, req_addr + 528, 1, true, false);
 
-    qvirtqueue_kick(&qvirtio_pci, &dev->vdev, vq, free_head);
+    qvirtqueue_kick(&qvirtio_pci, &dev->vdev, &vqpci->vq, free_head);
 
-    g_assert(qvirtio_wait_isr(&qvirtio_pci, &dev->vdev, 0x1,
+    g_assert(qvirtio_wait_queue_isr(&qvirtio_pci, &dev->vdev, &vqpci->vq,
                                                         QVIRTIO_BLK_TIMEOUT));
     status = readb(req_addr + 528);
     g_assert_cmpint(status, ==, 0);
@@ -251,19 +250,17 @@ static void pci_basic(void)
 
     g_free(req.data);
 
-    free_head = qvirtqueue_add(vq, req_addr, 16, false, true);
-    qvirtqueue_add(vq, req_addr + 16, 512, true, true);
-    qvirtqueue_add(vq, req_addr + 528, 1, true, false);
+    free_head = qvirtqueue_add(&vqpci->vq, req_addr, 16, false, true);
+    qvirtqueue_add(&vqpci->vq, req_addr + 16, 512, true, true);
+    qvirtqueue_add(&vqpci->vq, req_addr + 528, 1, true, false);
 
-    qvirtqueue_kick(&qvirtio_pci, &dev->vdev, vq, free_head);
+    qvirtqueue_kick(&qvirtio_pci, &dev->vdev, &vqpci->vq, free_head);
 
-    g_assert(qvirtio_wait_isr(&qvirtio_pci, &dev->vdev, 0x1,
+    g_assert(qvirtio_wait_queue_isr(&qvirtio_pci, &dev->vdev, &vqpci->vq,
                                                         QVIRTIO_BLK_TIMEOUT));
     status = readb(req_addr + 528);
     g_assert_cmpint(status, ==, 0);
 
-    guest_free(alloc, req_addr);
-
     data = g_malloc0(512);
     memread(req_addr + 16, data, 512);
     g_assert_cmpstr(data, ==, "TEST");
@@ -272,7 +269,7 @@ static void pci_basic(void)
     guest_free(alloc, req_addr);
 
     /* End test */
-    guest_free(alloc, vq->desc);
+    guest_free(alloc, vqpci->vq.desc);
     qvirtio_pci_device_disable(dev);
     g_free(dev);
     test_end();
@@ -282,7 +279,7 @@ static void pci_indirect(void)
 {
     QVirtioPCIDevice *dev;
     QPCIBus *bus;
-    QVirtQueue *vq;
+    QVirtQueuePCI *vqpci;
     QGuestAllocator *alloc;
     QVirtioBlkReq req;
     QVRingIndirectDesc *indirect;
@@ -311,8 +308,8 @@ static void pci_indirect(void)
     qvirtio_set_features(&qvirtio_pci, &dev->vdev, features);
 
     alloc = pc_alloc_init();
-    vq = qvirtqueue_setup(&qvirtio_pci, &dev->vdev, alloc, 0);
-
+    vqpci = (QVirtQueuePCI *)qvirtqueue_setup(&qvirtio_pci, &dev->vdev,
+                                                                    alloc, 0);
     qvirtio_set_driver_ok(&qvirtio_pci, &dev->vdev);
 
     /* Write request */
@@ -329,10 +326,10 @@ static void pci_indirect(void)
     indirect = qvring_indirect_desc_setup(&dev->vdev, alloc, 2);
     qvring_indirect_desc_add(indirect, req_addr, 528, false);
     qvring_indirect_desc_add(indirect, req_addr + 528, 1, true);
-    free_head = qvirtqueue_add_indirect(vq, indirect);
-    qvirtqueue_kick(&qvirtio_pci, &dev->vdev, vq, free_head);
+    free_head = qvirtqueue_add_indirect(&vqpci->vq, indirect);
+    qvirtqueue_kick(&qvirtio_pci, &dev->vdev, &vqpci->vq, free_head);
 
-    g_assert(qvirtio_wait_isr(&qvirtio_pci, &dev->vdev, 0x1,
+    g_assert(qvirtio_wait_queue_isr(&qvirtio_pci, &dev->vdev, &vqpci->vq,
                                                         QVIRTIO_BLK_TIMEOUT));
     status = readb(req_addr + 528);
     g_assert_cmpint(status, ==, 0);
@@ -354,10 +351,10 @@ static void pci_indirect(void)
     indirect = qvring_indirect_desc_setup(&dev->vdev, alloc, 2);
     qvring_indirect_desc_add(indirect, req_addr, 16, false);
     qvring_indirect_desc_add(indirect, req_addr + 16, 513, true);
-    free_head = qvirtqueue_add_indirect(vq, indirect);
-    qvirtqueue_kick(&qvirtio_pci, &dev->vdev, vq, free_head);
+    free_head = qvirtqueue_add_indirect(&vqpci->vq, indirect);
+    qvirtqueue_kick(&qvirtio_pci, &dev->vdev, &vqpci->vq, free_head);
 
-    g_assert(qvirtio_wait_isr(&qvirtio_pci, &dev->vdev, 0x1,
+    g_assert(qvirtio_wait_queue_isr(&qvirtio_pci, &dev->vdev, &vqpci->vq,
                                                         QVIRTIO_BLK_TIMEOUT));
     status = readb(req_addr + 528);
     g_assert_cmpint(status, ==, 0);
@@ -371,7 +368,7 @@ static void pci_indirect(void)
     guest_free(alloc, req_addr);
 
     /* End test */
-    guest_free(alloc, vq->desc);
+    guest_free(alloc, vqpci->vq.desc);
     qvirtio_pci_device_disable(dev);
     g_free(dev);
     test_end();
@@ -399,7 +396,7 @@ static void pci_config(void)
 
     qmp("{ 'execute': 'block_resize', 'arguments': { 'device': 'drive0', "
                                                     " 'size': %d } }", n_size);
-    g_assert(qvirtio_wait_isr(&qvirtio_pci, &dev->vdev, 0x2,
+    g_assert(qvirtio_wait_config_isr(&qvirtio_pci, &dev->vdev,
                                                         QVIRTIO_BLK_TIMEOUT));
 
     capacity = qvirtio_config_readq(&qvirtio_pci, &dev->vdev, addr);
@@ -410,6 +407,116 @@ static void pci_config(void)
     test_end();
 }
 
+static void pci_msix(void)
+{
+    QVirtioPCIDevice *dev;
+    QPCIBus *bus;
+    QVirtQueuePCI *vqpci;
+    QGuestAllocator *alloc;
+    QVirtioBlkReq req;
+    int n_size = TEST_IMAGE_SIZE / 2;
+    void *addr;
+    uint64_t req_addr;
+    uint64_t capacity;
+    uint32_t features;
+    uint32_t free_head;
+    uint8_t status;
+    char *data;
+
+    bus = test_start();
+    alloc = pc_alloc_init();
+
+    dev = virtio_blk_init(bus);
+    qpci_msix_enable(dev->pdev);
+
+    qvirtio_pci_set_msix_configuration_vector(dev, alloc, 0);
+
+    /* MSI-X is enabled */
+    addr = dev->addr + QVIRTIO_DEVICE_SPECIFIC_MSIX;
+
+    capacity = qvirtio_config_readq(&qvirtio_pci, &dev->vdev, addr);
+    g_assert_cmpint(capacity, ==, TEST_IMAGE_SIZE / 512);
+
+    features = qvirtio_get_features(&qvirtio_pci, &dev->vdev);
+    features = features & ~(QVIRTIO_F_BAD_FEATURE |
+                            QVIRTIO_F_RING_INDIRECT_DESC |
+                            QVIRTIO_F_RING_EVENT_IDX | QVIRTIO_BLK_F_SCSI);
+    qvirtio_set_features(&qvirtio_pci, &dev->vdev, features);
+
+    vqpci = (QVirtQueuePCI *)qvirtqueue_setup(&qvirtio_pci, &dev->vdev,
+                                                                    alloc, 0);
+    qvirtqueue_pci_msix_setup(dev, vqpci, alloc, 1);
+
+    qvirtio_set_driver_ok(&qvirtio_pci, &dev->vdev);
+
+    qmp("{ 'execute': 'block_resize', 'arguments': { 'device': 'drive0', "
+                                                    " 'size': %d } }", n_size);
+
+    g_assert(qvirtio_wait_config_isr(&qvirtio_pci, &dev->vdev,
+                                                        QVIRTIO_BLK_TIMEOUT));
+
+    capacity = qvirtio_config_readq(&qvirtio_pci, &dev->vdev, addr);
+    g_assert_cmpint(capacity, ==, n_size / 512);
+
+    /* Write request */
+    req.type = QVIRTIO_BLK_T_OUT;
+    req.ioprio = 1;
+    req.sector = 0;
+    req.data = g_malloc0(512);
+    strcpy(req.data, "TEST");
+
+    req_addr = virtio_blk_request(alloc, &req, 512);
+
+    g_free(req.data);
+
+    free_head = qvirtqueue_add(&vqpci->vq, req_addr, 528, false, true);
+    qvirtqueue_add(&vqpci->vq, req_addr + 528, 1, true, false);
+    qvirtqueue_kick(&qvirtio_pci, &dev->vdev, &vqpci->vq, free_head);
+
+    g_assert(qvirtio_wait_queue_isr(&qvirtio_pci, &dev->vdev, &vqpci->vq,
+                                                        QVIRTIO_BLK_TIMEOUT));
+
+    status = readb(req_addr + 528);
+    g_assert_cmpint(status, ==, 0);
+
+    guest_free(alloc, req_addr);
+
+    /* Read request */
+    req.type = QVIRTIO_BLK_T_IN;
+    req.ioprio = 1;
+    req.sector = 0;
+    req.data = g_malloc0(512);
+
+    req_addr = virtio_blk_request(alloc, &req, 512);
+
+    g_free(req.data);
+
+    free_head = qvirtqueue_add(&vqpci->vq, req_addr, 16, false, true);
+    qvirtqueue_add(&vqpci->vq, req_addr + 16, 513, true, false);
+
+    qvirtqueue_kick(&qvirtio_pci, &dev->vdev, &vqpci->vq, free_head);
+
+    g_assert(qvirtio_wait_queue_isr(&qvirtio_pci, &dev->vdev, &vqpci->vq,
+                                                        QVIRTIO_BLK_TIMEOUT));
+
+    status = readb(req_addr + 528);
+    g_assert_cmpint(status, ==, 0);
+
+    data = g_malloc0(512);
+    memread(req_addr + 16, data, 512);
+    g_assert_cmpstr(data, ==, "TEST");
+    g_free(data);
+
+    guest_free(alloc, req_addr);
+
+    /* End test */
+    guest_free(alloc, vqpci->vq.desc);
+    qpci_msix_disable(dev->pdev);
+    qvirtio_pci_device_disable(dev);
+    g_free(dev);
+    test_end();
+}
+
 int main(int argc, char **argv)
 {
     int ret;
@@ -419,6 +526,7 @@ int main(int argc, char **argv)
     g_test_add_func("/virtio/blk/pci/basic", pci_basic);
     g_test_add_func("/virtio/blk/pci/indirect", pci_indirect);
     g_test_add_func("/virtio/blk/pci/config", pci_config);
+    g_test_add_func("/virtio/blk/pci/msix", pci_msix);
 
     ret = g_test_run();
 
commit e11199554c568822c3ede3e3b4820ac3a146b1d9
Author: Marc MarÃ <marc.mari.barcelo at gmail.com>
Date:   Mon Sep 1 12:07:58 2014 +0200

    libqos: Added test case for configuration changes in virtio-blk test
    
    Reviewed-by: Stefan Hajnoczi <stefanha at redhat.com>
    Signed-off-by: Marc MarÃ <marc.mari.barcelo at gmail.com>
    Signed-off-by: Stefan Hajnoczi <stefanha at redhat.com>

diff --git a/tests/virtio-blk-test.c b/tests/virtio-blk-test.c
index 2f9cc2b..672580b 100644
--- a/tests/virtio-blk-test.c
+++ b/tests/virtio-blk-test.c
@@ -377,6 +377,39 @@ static void pci_indirect(void)
     test_end();
 }
 
+static void pci_config(void)
+{
+    QVirtioPCIDevice *dev;
+    QPCIBus *bus;
+    int n_size = TEST_IMAGE_SIZE / 2;
+    void *addr;
+    uint64_t capacity;
+
+    bus = test_start();
+
+    dev = virtio_blk_init(bus);
+
+    /* MSI-X is not enabled */
+    addr = dev->addr + QVIRTIO_DEVICE_SPECIFIC_NO_MSIX;
+
+    capacity = qvirtio_config_readq(&qvirtio_pci, &dev->vdev, addr);
+    g_assert_cmpint(capacity, ==, TEST_IMAGE_SIZE / 512);
+
+    qvirtio_set_driver_ok(&qvirtio_pci, &dev->vdev);
+
+    qmp("{ 'execute': 'block_resize', 'arguments': { 'device': 'drive0', "
+                                                    " 'size': %d } }", n_size);
+    g_assert(qvirtio_wait_isr(&qvirtio_pci, &dev->vdev, 0x2,
+                                                        QVIRTIO_BLK_TIMEOUT));
+
+    capacity = qvirtio_config_readq(&qvirtio_pci, &dev->vdev, addr);
+    g_assert_cmpint(capacity, ==, n_size / 512);
+
+    qvirtio_pci_device_disable(dev);
+    g_free(dev);
+    test_end();
+}
+
 int main(int argc, char **argv)
 {
     int ret;
@@ -385,6 +418,7 @@ int main(int argc, char **argv)
 
     g_test_add_func("/virtio/blk/pci/basic", pci_basic);
     g_test_add_func("/virtio/blk/pci/indirect", pci_indirect);
+    g_test_add_func("/virtio/blk/pci/config", pci_config);
 
     ret = g_test_run();
 
commit f294b029aa2beb1c67116e04bff5d331f0b18288
Author: Marc MarÃ <marc.mari.barcelo at gmail.com>
Date:   Mon Sep 1 12:07:57 2014 +0200

    libqos: Added indirect descriptor support to virtio implementation
    
    Add functions necessary for working with indirect descriptors.
    Add test using new functions.
    
    Reviewed-by: Stefan Hajnoczi <stefanha at redhat.com>
    Signed-off-by: Marc MarÃ <marc.mari.barcelo at gmail.com>
    Signed-off-by: Stefan Hajnoczi <stefanha at redhat.com>

diff --git a/tests/libqos/virtio-pci.c b/tests/libqos/virtio-pci.c
index 12b06a2..cf15f7a 100644
--- a/tests/libqos/virtio-pci.c
+++ b/tests/libqos/virtio-pci.c
@@ -107,6 +107,12 @@ static void qvirtio_pci_set_features(QVirtioDevice *d, uint32_t features)
     qpci_io_writel(dev->pdev, dev->addr + QVIRTIO_GUEST_FEATURES, features);
 }
 
+static uint32_t qvirtio_pci_get_guest_features(QVirtioDevice *d)
+{
+    QVirtioPCIDevice *dev = (QVirtioPCIDevice *)d;
+    return qpci_io_readl(dev->pdev, dev->addr + QVIRTIO_GUEST_FEATURES);
+}
+
 static uint8_t qvirtio_pci_get_status(QVirtioDevice *d)
 {
     QVirtioPCIDevice *dev = (QVirtioPCIDevice *)d;
@@ -146,10 +152,12 @@ static void qvirtio_pci_set_queue_address(QVirtioDevice *d, uint32_t pfn)
 static QVirtQueue *qvirtio_pci_virtqueue_setup(QVirtioDevice *d,
                                         QGuestAllocator *alloc, uint16_t index)
 {
+    uint32_t feat;
     uint64_t addr;
     QVirtQueue *vq;
 
     vq = g_malloc0(sizeof(*vq));
+    feat = qvirtio_pci_get_guest_features(d);
 
     qvirtio_pci_queue_select(d, index);
     vq->index = index;
@@ -157,6 +165,7 @@ static QVirtQueue *qvirtio_pci_virtqueue_setup(QVirtioDevice *d,
     vq->free_head = 0;
     vq->num_free = vq->size;
     vq->align = QVIRTIO_PCI_ALIGN;
+    vq->indirect = (feat & QVIRTIO_F_RING_INDIRECT_DESC) != 0;
 
     /* Check different than 0 */
     g_assert_cmpint(vq->size, !=, 0);
@@ -186,6 +195,7 @@ const QVirtioBus qvirtio_pci = {
     .config_readq = qvirtio_pci_config_readq,
     .get_features = qvirtio_pci_get_features,
     .set_features = qvirtio_pci_set_features,
+    .get_guest_features = qvirtio_pci_get_guest_features,
     .get_status = qvirtio_pci_get_status,
     .set_status = qvirtio_pci_set_status,
     .get_isr_status = qvirtio_pci_get_isr_status,
diff --git a/tests/libqos/virtio.c b/tests/libqos/virtio.c
index de92642..b1cab1f 100644
--- a/tests/libqos/virtio.c
+++ b/tests/libqos/virtio.c
@@ -116,6 +116,51 @@ void qvring_init(const QGuestAllocator *alloc, QVirtQueue *vq, uint64_t addr)
     writew(vq->used, 0);
 }
 
+QVRingIndirectDesc *qvring_indirect_desc_setup(QVirtioDevice *d,
+                                        QGuestAllocator *alloc, uint16_t elem)
+{
+    int i;
+    QVRingIndirectDesc *indirect = g_malloc(sizeof(*indirect));
+
+    indirect->index = 0;
+    indirect->elem = elem;
+    indirect->desc = guest_alloc(alloc, sizeof(QVRingDesc)*elem);
+
+    for (i = 0; i < elem - 1; ++i) {
+        /* indirect->desc[i].addr */
+        writeq(indirect->desc + (16 * i), 0);
+        /* indirect->desc[i].flags */
+        writew(indirect->desc + (16 * i) + 12, QVRING_DESC_F_NEXT);
+        /* indirect->desc[i].next */
+        writew(indirect->desc + (16 * i) + 14, i + 1);
+    }
+
+    return indirect;
+}
+
+void qvring_indirect_desc_add(QVRingIndirectDesc *indirect, uint64_t data,
+                                                    uint32_t len, bool write)
+{
+    uint16_t flags;
+
+    g_assert_cmpint(indirect->index, <, indirect->elem);
+
+    flags = readw(indirect->desc + (16 * indirect->index) + 12);
+
+    if (write) {
+        flags |= QVRING_DESC_F_WRITE;
+    }
+
+    /* indirect->desc[indirect->index].addr */
+    writeq(indirect->desc + (16 * indirect->index), data);
+    /* indirect->desc[indirect->index].len */
+    writel(indirect->desc + (16 * indirect->index) + 8, len);
+    /* indirect->desc[indirect->index].flags */
+    writew(indirect->desc + (16 * indirect->index) + 12, flags);
+
+    indirect->index++;
+}
+
 uint32_t qvirtqueue_add(QVirtQueue *vq, uint64_t data, uint32_t len, bool write,
                                                                     bool next)
 {
@@ -140,6 +185,25 @@ uint32_t qvirtqueue_add(QVirtQueue *vq, uint64_t data, uint32_t len, bool write,
     return vq->free_head++; /* Return and increase, in this order */
 }
 
+uint32_t qvirtqueue_add_indirect(QVirtQueue *vq, QVRingIndirectDesc *indirect)
+{
+    g_assert(vq->indirect);
+    g_assert_cmpint(vq->size, >=, indirect->elem);
+    g_assert_cmpint(indirect->index, ==, indirect->elem);
+
+    vq->num_free--;
+
+    /* vq->desc[vq->free_head].addr */
+    writeq(vq->desc + (16 * vq->free_head), indirect->desc);
+    /* vq->desc[vq->free_head].len */
+    writel(vq->desc + (16 * vq->free_head) + 8,
+                                        sizeof(QVRingDesc) * indirect->elem);
+    /* vq->desc[vq->free_head].flags */
+    writew(vq->desc + (16 * vq->free_head) + 12, QVRING_DESC_F_INDIRECT);
+
+    return vq->free_head++; /* Return and increase, in this order */
+}
+
 void qvirtqueue_kick(const QVirtioBus *bus, QVirtioDevice *d, QVirtQueue *vq,
                                                             uint32_t free_head)
 {
diff --git a/tests/libqos/virtio.h b/tests/libqos/virtio.h
index aba5a1e..1860660 100644
--- a/tests/libqos/virtio.h
+++ b/tests/libqos/virtio.h
@@ -22,6 +22,11 @@
 #define QVIRTIO_NET_DEVICE_ID   0x1
 #define QVIRTIO_BLK_DEVICE_ID   0x2
 
+#define QVIRTIO_F_NOTIFY_ON_EMPTY       0x01000000
+#define QVIRTIO_F_ANY_LAYOUT            0x08000000
+#define QVIRTIO_F_RING_INDIRECT_DESC    0x10000000
+#define QVIRTIO_F_RING_EVENT_IDX        0x20000000
+
 #define QVRING_DESC_F_NEXT      0x1
 #define QVRING_DESC_F_WRITE     0x2
 #define QVRING_DESC_F_INDIRECT  0x4
@@ -74,8 +79,15 @@ typedef struct QVirtQueue {
     uint32_t free_head;
     uint32_t num_free;
     uint32_t align;
+    bool indirect;
 } QVirtQueue;
 
+typedef struct QVRingIndirectDesc {
+    uint64_t desc; /* This points to an array fo QVRingDesc */
+    uint16_t index;
+    uint16_t elem;
+} QVRingIndirectDesc;
+
 typedef struct QVirtioBus {
     uint8_t (*config_readb)(QVirtioDevice *d, void *addr);
     uint16_t (*config_readw)(QVirtioDevice *d, void *addr);
@@ -85,9 +97,12 @@ typedef struct QVirtioBus {
     /* Get features of the device */
     uint32_t (*get_features)(QVirtioDevice *d);
 
-    /* Get features of the device */
+    /* Set features of the device */
     void (*set_features)(QVirtioDevice *d, uint32_t features);
 
+    /* Get features of the guest */
+    uint32_t (*get_guest_features)(QVirtioDevice *d);
+
     /* Get status of the device */
     uint8_t (*get_status)(QVirtioDevice *d);
 
@@ -144,8 +159,13 @@ QVirtQueue *qvirtqueue_setup(const QVirtioBus *bus, QVirtioDevice *d,
                                         QGuestAllocator *alloc, uint16_t index);
 
 void qvring_init(const QGuestAllocator *alloc, QVirtQueue *vq, uint64_t addr);
+QVRingIndirectDesc *qvring_indirect_desc_setup(QVirtioDevice *d,
+                                        QGuestAllocator *alloc, uint16_t elem);
+void qvring_indirect_desc_add(QVRingIndirectDesc *indirect, uint64_t data,
+                                                    uint32_t len, bool write);
 uint32_t qvirtqueue_add(QVirtQueue *vq, uint64_t data, uint32_t len, bool write,
                                                                     bool next);
+uint32_t qvirtqueue_add_indirect(QVirtQueue *vq, QVRingIndirectDesc *indirect);
 void qvirtqueue_kick(const QVirtioBus *bus, QVirtioDevice *d, QVirtQueue *vq,
                                                             uint32_t free_head);
 
diff --git a/tests/virtio-blk-test.c b/tests/virtio-blk-test.c
index b048938..2f9cc2b 100644
--- a/tests/virtio-blk-test.c
+++ b/tests/virtio-blk-test.c
@@ -278,6 +278,105 @@ static void pci_basic(void)
     test_end();
 }
 
+static void pci_indirect(void)
+{
+    QVirtioPCIDevice *dev;
+    QPCIBus *bus;
+    QVirtQueue *vq;
+    QGuestAllocator *alloc;
+    QVirtioBlkReq req;
+    QVRingIndirectDesc *indirect;
+    void *addr;
+    uint64_t req_addr;
+    uint64_t capacity;
+    uint32_t features;
+    uint32_t free_head;
+    uint8_t status;
+    char *data;
+
+    bus = test_start();
+
+    dev = virtio_blk_init(bus);
+
+    /* MSI-X is not enabled */
+    addr = dev->addr + QVIRTIO_DEVICE_SPECIFIC_NO_MSIX;
+
+    capacity = qvirtio_config_readq(&qvirtio_pci, &dev->vdev, addr);
+    g_assert_cmpint(capacity, ==, TEST_IMAGE_SIZE / 512);
+
+    features = qvirtio_get_features(&qvirtio_pci, &dev->vdev);
+    g_assert_cmphex(features & QVIRTIO_F_RING_INDIRECT_DESC, !=, 0);
+    features = features & ~(QVIRTIO_F_BAD_FEATURE | QVIRTIO_F_RING_EVENT_IDX |
+                                                            QVIRTIO_BLK_F_SCSI);
+    qvirtio_set_features(&qvirtio_pci, &dev->vdev, features);
+
+    alloc = pc_alloc_init();
+    vq = qvirtqueue_setup(&qvirtio_pci, &dev->vdev, alloc, 0);
+
+    qvirtio_set_driver_ok(&qvirtio_pci, &dev->vdev);
+
+    /* Write request */
+    req.type = QVIRTIO_BLK_T_OUT;
+    req.ioprio = 1;
+    req.sector = 0;
+    req.data = g_malloc0(512);
+    strcpy(req.data, "TEST");
+
+    req_addr = virtio_blk_request(alloc, &req, 512);
+
+    g_free(req.data);
+
+    indirect = qvring_indirect_desc_setup(&dev->vdev, alloc, 2);
+    qvring_indirect_desc_add(indirect, req_addr, 528, false);
+    qvring_indirect_desc_add(indirect, req_addr + 528, 1, true);
+    free_head = qvirtqueue_add_indirect(vq, indirect);
+    qvirtqueue_kick(&qvirtio_pci, &dev->vdev, vq, free_head);
+
+    g_assert(qvirtio_wait_isr(&qvirtio_pci, &dev->vdev, 0x1,
+                                                        QVIRTIO_BLK_TIMEOUT));
+    status = readb(req_addr + 528);
+    g_assert_cmpint(status, ==, 0);
+
+    g_free(indirect);
+    guest_free(alloc, req_addr);
+
+    /* Read request */
+    req.type = QVIRTIO_BLK_T_IN;
+    req.ioprio = 1;
+    req.sector = 0;
+    req.data = g_malloc0(512);
+    strcpy(req.data, "TEST");
+
+    req_addr = virtio_blk_request(alloc, &req, 512);
+
+    g_free(req.data);
+
+    indirect = qvring_indirect_desc_setup(&dev->vdev, alloc, 2);
+    qvring_indirect_desc_add(indirect, req_addr, 16, false);
+    qvring_indirect_desc_add(indirect, req_addr + 16, 513, true);
+    free_head = qvirtqueue_add_indirect(vq, indirect);
+    qvirtqueue_kick(&qvirtio_pci, &dev->vdev, vq, free_head);
+
+    g_assert(qvirtio_wait_isr(&qvirtio_pci, &dev->vdev, 0x1,
+                                                        QVIRTIO_BLK_TIMEOUT));
+    status = readb(req_addr + 528);
+    g_assert_cmpint(status, ==, 0);
+
+    data = g_malloc0(512);
+    memread(req_addr + 16, data, 512);
+    g_assert_cmpstr(data, ==, "TEST");
+    g_free(data);
+
+    g_free(indirect);
+    guest_free(alloc, req_addr);
+
+    /* End test */
+    guest_free(alloc, vq->desc);
+    qvirtio_pci_device_disable(dev);
+    g_free(dev);
+    test_end();
+}
+
 int main(int argc, char **argv)
 {
     int ret;
@@ -285,6 +384,7 @@ int main(int argc, char **argv)
     g_test_init(&argc, &argv, NULL);
 
     g_test_add_func("/virtio/blk/pci/basic", pci_basic);
+    g_test_add_func("/virtio/blk/pci/indirect", pci_indirect);
 
     ret = g_test_run();
 
commit bf3c63d2010c5ef52f8b988bee2a1486a056795f
Author: Marc MarÃ <marc.mari.barcelo at gmail.com>
Date:   Mon Sep 1 12:07:56 2014 +0200

    libqos: Added basic virtqueue support to virtio implementation
    
    Add status changing and feature negotiation.
    Add basic virtqueue support for adding and sending virtqueue requests.
    Add ISR checking.
    
    [Squashed request endianness fix by Greg Kurz <gkurz at linux.vnet.ibm.com>
    --Stefan]
    
    Reviewed-by: Stefan Hajnoczi <stefanha at redhat.com>
    Signed-off-by: Marc MarÃ <marc.mari.barcelo at gmail.com>
    Signed-off-by: Stefan Hajnoczi <stefanha at redhat.com>

diff --git a/tests/libqos/virtio-pci.c b/tests/libqos/virtio-pci.c
index 1a37620..12b06a2 100644
--- a/tests/libqos/virtio-pci.c
+++ b/tests/libqos/virtio-pci.c
@@ -14,6 +14,8 @@
 #include "libqos/virtio-pci.h"
 #include "libqos/pci.h"
 #include "libqos/pci-pc.h"
+#include "libqos/malloc.h"
+#include "libqos/malloc-pc.h"
 
 #include "hw/pci/pci_regs.h"
 
@@ -93,6 +95,18 @@ static uint64_t qvirtio_pci_config_readq(QVirtioDevice *d, void *addr)
     return u64;
 }
 
+static uint32_t qvirtio_pci_get_features(QVirtioDevice *d)
+{
+    QVirtioPCIDevice *dev = (QVirtioPCIDevice *)d;
+    return qpci_io_readl(dev->pdev, dev->addr + QVIRTIO_DEVICE_FEATURES);
+}
+
+static void qvirtio_pci_set_features(QVirtioDevice *d, uint32_t features)
+{
+    QVirtioPCIDevice *dev = (QVirtioPCIDevice *)d;
+    qpci_io_writel(dev->pdev, dev->addr + QVIRTIO_GUEST_FEATURES, features);
+}
+
 static uint8_t qvirtio_pci_get_status(QVirtioDevice *d)
 {
     QVirtioPCIDevice *dev = (QVirtioPCIDevice *)d;
@@ -105,13 +119,81 @@ static void qvirtio_pci_set_status(QVirtioDevice *d, uint8_t status)
     qpci_io_writeb(dev->pdev, dev->addr + QVIRTIO_DEVICE_STATUS, status);
 }
 
+static uint8_t qvirtio_pci_get_isr_status(QVirtioDevice *d)
+{
+    QVirtioPCIDevice *dev = (QVirtioPCIDevice *)d;
+    return qpci_io_readb(dev->pdev, dev->addr + QVIRTIO_ISR_STATUS);
+}
+
+static void qvirtio_pci_queue_select(QVirtioDevice *d, uint16_t index)
+{
+    QVirtioPCIDevice *dev = (QVirtioPCIDevice *)d;
+    qpci_io_writeb(dev->pdev, dev->addr + QVIRTIO_QUEUE_SELECT, index);
+}
+
+static uint16_t qvirtio_pci_get_queue_size(QVirtioDevice *d)
+{
+    QVirtioPCIDevice *dev = (QVirtioPCIDevice *)d;
+    return qpci_io_readw(dev->pdev, dev->addr + QVIRTIO_QUEUE_SIZE);
+}
+
+static void qvirtio_pci_set_queue_address(QVirtioDevice *d, uint32_t pfn)
+{
+    QVirtioPCIDevice *dev = (QVirtioPCIDevice *)d;
+    qpci_io_writel(dev->pdev, dev->addr + QVIRTIO_QUEUE_ADDRESS, pfn);
+}
+
+static QVirtQueue *qvirtio_pci_virtqueue_setup(QVirtioDevice *d,
+                                        QGuestAllocator *alloc, uint16_t index)
+{
+    uint64_t addr;
+    QVirtQueue *vq;
+
+    vq = g_malloc0(sizeof(*vq));
+
+    qvirtio_pci_queue_select(d, index);
+    vq->index = index;
+    vq->size = qvirtio_pci_get_queue_size(d);
+    vq->free_head = 0;
+    vq->num_free = vq->size;
+    vq->align = QVIRTIO_PCI_ALIGN;
+
+    /* Check different than 0 */
+    g_assert_cmpint(vq->size, !=, 0);
+
+    /* Check power of 2 */
+    g_assert_cmpint(vq->size & (vq->size - 1), ==, 0);
+
+    addr = guest_alloc(alloc, qvring_size(vq->size, QVIRTIO_PCI_ALIGN));
+    qvring_init(alloc, vq, addr);
+    qvirtio_pci_set_queue_address(d, vq->desc / QVIRTIO_PCI_ALIGN);
+
+    /* TODO: MSI-X configuration */
+
+    return vq;
+}
+
+static void qvirtio_pci_virtqueue_kick(QVirtioDevice *d, QVirtQueue *vq)
+{
+    QVirtioPCIDevice *dev = (QVirtioPCIDevice *)d;
+    qpci_io_writew(dev->pdev, dev->addr + QVIRTIO_QUEUE_NOTIFY, vq->index);
+}
+
 const QVirtioBus qvirtio_pci = {
     .config_readb = qvirtio_pci_config_readb,
     .config_readw = qvirtio_pci_config_readw,
     .config_readl = qvirtio_pci_config_readl,
     .config_readq = qvirtio_pci_config_readq,
+    .get_features = qvirtio_pci_get_features,
+    .set_features = qvirtio_pci_set_features,
     .get_status = qvirtio_pci_get_status,
     .set_status = qvirtio_pci_set_status,
+    .get_isr_status = qvirtio_pci_get_isr_status,
+    .queue_select = qvirtio_pci_queue_select,
+    .get_queue_size = qvirtio_pci_get_queue_size,
+    .set_queue_address = qvirtio_pci_set_queue_address,
+    .virtqueue_setup = qvirtio_pci_virtqueue_setup,
+    .virtqueue_kick = qvirtio_pci_virtqueue_kick,
 };
 
 void qvirtio_pci_foreach(QPCIBus *bus, uint16_t device_type,
diff --git a/tests/libqos/virtio-pci.h b/tests/libqos/virtio-pci.h
index 26f902e..40bd12d 100644
--- a/tests/libqos/virtio-pci.h
+++ b/tests/libqos/virtio-pci.h
@@ -26,6 +26,8 @@
 #define QVIRTIO_DEVICE_SPECIFIC_MSIX    0x18
 #define QVIRTIO_DEVICE_SPECIFIC_NO_MSIX 0x14
 
+#define QVIRTIO_PCI_ALIGN   4096
+
 typedef struct QVirtioPCIDevice {
     QVirtioDevice vdev;
     QPCIDevice *pdev;
diff --git a/tests/libqos/virtio.c b/tests/libqos/virtio.c
index 577d679..de92642 100644
--- a/tests/libqos/virtio.c
+++ b/tests/libqos/virtio.c
@@ -35,6 +35,23 @@ uint64_t qvirtio_config_readq(const QVirtioBus *bus, QVirtioDevice *d,
     return bus->config_readq(d, addr);
 }
 
+uint32_t qvirtio_get_features(const QVirtioBus *bus, QVirtioDevice *d)
+{
+    return bus->get_features(d);
+}
+
+void qvirtio_set_features(const QVirtioBus *bus, QVirtioDevice *d,
+                                                            uint32_t features)
+{
+    bus->set_features(d, features);
+}
+
+QVirtQueue *qvirtqueue_setup(const QVirtioBus *bus, QVirtioDevice *d,
+                                        QGuestAllocator *alloc, uint16_t index)
+{
+    return bus->virtqueue_setup(d, alloc, index);
+}
+
 void qvirtio_reset(const QVirtioBus *bus, QVirtioDevice *d)
 {
     bus->set_status(d, QVIRTIO_RESET);
@@ -53,3 +70,86 @@ void qvirtio_set_driver(const QVirtioBus *bus, QVirtioDevice *d)
     g_assert_cmphex(bus->get_status(d), ==,
                                     QVIRTIO_DRIVER | QVIRTIO_ACKNOWLEDGE);
 }
+
+void qvirtio_set_driver_ok(const QVirtioBus *bus, QVirtioDevice *d)
+{
+    bus->set_status(d, bus->get_status(d) | QVIRTIO_DRIVER_OK);
+    g_assert_cmphex(bus->get_status(d), ==,
+                QVIRTIO_DRIVER_OK | QVIRTIO_DRIVER | QVIRTIO_ACKNOWLEDGE);
+}
+
+bool qvirtio_wait_isr(const QVirtioBus *bus, QVirtioDevice *d, uint8_t mask,
+                                                            uint64_t timeout)
+{
+    do {
+        clock_step(10);
+        if (bus->get_isr_status(d) & mask) {
+            break; /* It has ended */
+        }
+    } while (--timeout);
+
+    return timeout != 0;
+}
+
+void qvring_init(const QGuestAllocator *alloc, QVirtQueue *vq, uint64_t addr)
+{
+    int i;
+
+    vq->desc = addr;
+    vq->avail = vq->desc + vq->size*sizeof(QVRingDesc);
+    vq->used = (uint64_t)((vq->avail + sizeof(uint16_t) * (3 + vq->size)
+        + vq->align - 1) & ~(vq->align - 1));
+
+    for (i = 0; i < vq->size - 1; i++) {
+        /* vq->desc[i].addr */
+        writew(vq->desc + (16 * i), 0);
+        /* vq->desc[i].next */
+        writew(vq->desc + (16 * i) + 14, i + 1);
+    }
+
+    /* vq->avail->flags */
+    writew(vq->avail, 0);
+    /* vq->avail->idx */
+    writew(vq->avail + 2, 0);
+
+    /* vq->used->flags */
+    writew(vq->used, 0);
+}
+
+uint32_t qvirtqueue_add(QVirtQueue *vq, uint64_t data, uint32_t len, bool write,
+                                                                    bool next)
+{
+    uint16_t flags = 0;
+    vq->num_free--;
+
+    if (write) {
+        flags |= QVRING_DESC_F_WRITE;
+    }
+
+    if (next) {
+        flags |= QVRING_DESC_F_NEXT;
+    }
+
+    /* vq->desc[vq->free_head].addr */
+    writeq(vq->desc + (16 * vq->free_head), data);
+    /* vq->desc[vq->free_head].len */
+    writel(vq->desc + (16 * vq->free_head) + 8, len);
+    /* vq->desc[vq->free_head].flags */
+    writew(vq->desc + (16 * vq->free_head) + 12, flags);
+
+    return vq->free_head++; /* Return and increase, in this order */
+}
+
+void qvirtqueue_kick(const QVirtioBus *bus, QVirtioDevice *d, QVirtQueue *vq,
+                                                            uint32_t free_head)
+{
+    /* vq->avail->idx */
+    uint16_t idx = readl(vq->avail + 2);
+
+    /* vq->avail->ring[idx % vq->size] */
+    writel(vq->avail + 4 + (2 * (idx % vq->size)), free_head);
+    /* vq->avail->idx */
+    writel(vq->avail + 2, idx + 1);
+
+    bus->virtqueue_kick(d, vq);
+}
diff --git a/tests/libqos/virtio.h b/tests/libqos/virtio.h
index 8d7238b..aba5a1e 100644
--- a/tests/libqos/virtio.h
+++ b/tests/libqos/virtio.h
@@ -10,33 +10,117 @@
 #ifndef LIBQOS_VIRTIO_H
 #define LIBQOS_VIRTIO_H
 
+#include "libqos/malloc.h"
+
 #define QVIRTIO_VENDOR_ID       0x1AF4
 
 #define QVIRTIO_RESET           0x0
 #define QVIRTIO_ACKNOWLEDGE     0x1
 #define QVIRTIO_DRIVER          0x2
+#define QVIRTIO_DRIVER_OK       0x4
 
 #define QVIRTIO_NET_DEVICE_ID   0x1
 #define QVIRTIO_BLK_DEVICE_ID   0x2
 
+#define QVRING_DESC_F_NEXT      0x1
+#define QVRING_DESC_F_WRITE     0x2
+#define QVRING_DESC_F_INDIRECT  0x4
+
+#define QVIRTIO_F_NOTIFY_ON_EMPTY       0x01000000
+#define QVIRTIO_F_ANY_LAYOUT            0x08000000
+#define QVIRTIO_F_RING_INDIRECT_DESC    0x10000000
+#define QVIRTIO_F_RING_EVENT_IDX        0x20000000
+#define QVIRTIO_F_BAD_FEATURE           0x40000000
+
+#define QVRING_AVAIL_F_NO_INTERRUPT     1
+
+#define QVRING_USED_F_NO_NOTIFY     1
+
 typedef struct QVirtioDevice {
     /* Device type */
     uint16_t device_type;
 } QVirtioDevice;
 
+typedef struct QVRingDesc {
+    uint64_t addr;
+    uint32_t len;
+    uint16_t flags;
+    uint16_t next;
+} QVRingDesc;
+
+typedef struct QVRingAvail {
+    uint16_t flags;
+    uint16_t idx;
+    uint16_t ring[0]; /* This is an array of uint16_t */
+} QVRingAvail;
+
+typedef struct QVRingUsedElem {
+    uint32_t id;
+    uint32_t len;
+} QVRingUsedElem;
+
+typedef struct QVRingUsed {
+    uint16_t flags;
+    uint16_t idx;
+    QVRingUsedElem ring[0]; /* This is an array of QVRingUsedElem structs */
+} QVRingUsed;
+
+typedef struct QVirtQueue {
+    uint64_t desc; /* This points to an array of QVRingDesc */
+    uint64_t avail; /* This points to a QVRingAvail */
+    uint64_t used; /* This points to a QVRingDesc */
+    uint16_t index;
+    uint32_t size;
+    uint32_t free_head;
+    uint32_t num_free;
+    uint32_t align;
+} QVirtQueue;
+
 typedef struct QVirtioBus {
     uint8_t (*config_readb)(QVirtioDevice *d, void *addr);
     uint16_t (*config_readw)(QVirtioDevice *d, void *addr);
     uint32_t (*config_readl)(QVirtioDevice *d, void *addr);
     uint64_t (*config_readq)(QVirtioDevice *d, void *addr);
 
+    /* Get features of the device */
+    uint32_t (*get_features)(QVirtioDevice *d);
+
+    /* Get features of the device */
+    void (*set_features)(QVirtioDevice *d, uint32_t features);
+
     /* Get status of the device */
     uint8_t (*get_status)(QVirtioDevice *d);
 
     /* Set status of the device  */
     void (*set_status)(QVirtioDevice *d, uint8_t status);
+
+    /* Get the ISR status of the device */
+    uint8_t (*get_isr_status)(QVirtioDevice *d);
+
+    /* Select a queue to work on */
+    void (*queue_select)(QVirtioDevice *d, uint16_t index);
+
+    /* Get the size of the selected queue */
+    uint16_t (*get_queue_size)(QVirtioDevice *d);
+
+    /* Set the address of the selected queue */
+    void (*set_queue_address)(QVirtioDevice *d, uint32_t pfn);
+
+    /* Setup the virtqueue specified by index */
+    QVirtQueue *(*virtqueue_setup)(QVirtioDevice *d, QGuestAllocator *alloc,
+                                                                uint16_t index);
+
+    /* Notify changes in virtqueue */
+    void (*virtqueue_kick)(QVirtioDevice *d, QVirtQueue *vq);
 } QVirtioBus;
 
+static inline uint32_t qvring_size(uint32_t num, uint32_t align)
+{
+    return ((sizeof(struct QVRingDesc) * num + sizeof(uint16_t) * (3 + num)
+        + align - 1) & ~(align - 1))
+        + sizeof(uint16_t) * 3 + sizeof(struct QVRingUsedElem) * num;
+}
+
 uint8_t qvirtio_config_readb(const QVirtioBus *bus, QVirtioDevice *d,
                                                                 void *addr);
 uint16_t qvirtio_config_readw(const QVirtioBus *bus, QVirtioDevice *d,
@@ -45,9 +129,24 @@ uint32_t qvirtio_config_readl(const QVirtioBus *bus, QVirtioDevice *d,
                                                                 void *addr);
 uint64_t qvirtio_config_readq(const QVirtioBus *bus, QVirtioDevice *d,
                                                                 void *addr);
+uint32_t qvirtio_get_features(const QVirtioBus *bus, QVirtioDevice *d);
+void qvirtio_set_features(const QVirtioBus *bus, QVirtioDevice *d,
+                                                            uint32_t features);
 
 void qvirtio_reset(const QVirtioBus *bus, QVirtioDevice *d);
 void qvirtio_set_acknowledge(const QVirtioBus *bus, QVirtioDevice *d);
 void qvirtio_set_driver(const QVirtioBus *bus, QVirtioDevice *d);
+void qvirtio_set_driver_ok(const QVirtioBus *bus, QVirtioDevice *d);
+
+bool qvirtio_wait_isr(const QVirtioBus *bus, QVirtioDevice *d, uint8_t mask,
+                                                            uint64_t timeout);
+QVirtQueue *qvirtqueue_setup(const QVirtioBus *bus, QVirtioDevice *d,
+                                        QGuestAllocator *alloc, uint16_t index);
+
+void qvring_init(const QGuestAllocator *alloc, QVirtQueue *vq, uint64_t addr);
+uint32_t qvirtqueue_add(QVirtQueue *vq, uint64_t data, uint32_t len, bool write,
+                                                                    bool next);
+void qvirtqueue_kick(const QVirtioBus *bus, QVirtioDevice *d, QVirtQueue *vq,
+                                                            uint32_t free_head);
 
 #endif
diff --git a/tests/virtio-blk-test.c b/tests/virtio-blk-test.c
index 649f7cf..b048938 100644
--- a/tests/virtio-blk-test.c
+++ b/tests/virtio-blk-test.c
@@ -17,10 +17,41 @@
 #include "libqos/virtio.h"
 #include "libqos/virtio-pci.h"
 #include "libqos/pci-pc.h"
+#include "libqos/malloc.h"
+#include "libqos/malloc-pc.h"
+#include "qemu/bswap.h"
 
-#define TEST_IMAGE_SIZE (64 * 1024 * 1024)
-#define PCI_SLOT    0x04
-#define PCI_FN      0x00
+#define QVIRTIO_BLK_F_BARRIER       0x00000001
+#define QVIRTIO_BLK_F_SIZE_MAX      0x00000002
+#define QVIRTIO_BLK_F_SEG_MAX       0x00000004
+#define QVIRTIO_BLK_F_GEOMETRY      0x00000010
+#define QVIRTIO_BLK_F_RO            0x00000020
+#define QVIRTIO_BLK_F_BLK_SIZE      0x00000040
+#define QVIRTIO_BLK_F_SCSI          0x00000080
+#define QVIRTIO_BLK_F_WCE           0x00000200
+#define QVIRTIO_BLK_F_TOPOLOGY      0x00000400
+#define QVIRTIO_BLK_F_CONFIG_WCE    0x00000800
+
+#define QVIRTIO_BLK_T_IN            0
+#define QVIRTIO_BLK_T_OUT           1
+#define QVIRTIO_BLK_T_SCSI_CMD      2
+#define QVIRTIO_BLK_T_SCSI_CMD_OUT  3
+#define QVIRTIO_BLK_T_FLUSH         4
+#define QVIRTIO_BLK_T_FLUSH_OUT     5
+#define QVIRTIO_BLK_T_GET_ID        8
+
+#define TEST_IMAGE_SIZE         (64 * 1024 * 1024)
+#define QVIRTIO_BLK_TIMEOUT     100
+#define PCI_SLOT                0x04
+#define PCI_FN                  0x00
+
+typedef struct QVirtioBlkReq {
+    uint32_t type;
+    uint32_t ioprio;
+    uint64_t sector;
+    char *data;
+    uint8_t status;
+} QVirtioBlkReq;
 
 static QPCIBus *test_start(void)
 {
@@ -66,12 +97,53 @@ static QVirtioPCIDevice *virtio_blk_init(QPCIBus *bus)
     return dev;
 }
 
+static inline void virtio_blk_fix_request(QVirtioBlkReq *req)
+{
+#ifdef HOST_WORDS_BIGENDIAN
+    bool host_endian = true;
+#else
+    bool host_endian = false;
+#endif
+
+    if (qtest_big_endian() != host_endian) {
+        req->type = bswap32(req->type);
+        req->ioprio = bswap32(req->ioprio);
+        req->sector = bswap64(req->sector);
+    }
+}
+
+static uint64_t virtio_blk_request(QGuestAllocator *alloc, QVirtioBlkReq *req,
+                                                            uint64_t data_size)
+{
+    uint64_t addr;
+    uint8_t status = 0xFF;
+
+    g_assert_cmpuint(data_size % 512, ==, 0);
+    addr = guest_alloc(alloc, sizeof(*req) + data_size);
+
+    virtio_blk_fix_request(req);
+
+    memwrite(addr, req, 16);
+    memwrite(addr + 16, req->data, data_size);
+    memwrite(addr + 16 + data_size, &status, sizeof(status));
+
+    return addr;
+}
+
 static void pci_basic(void)
 {
     QVirtioPCIDevice *dev;
     QPCIBus *bus;
+    QVirtQueue *vq;
+    QGuestAllocator *alloc;
+    QVirtioBlkReq req;
     void *addr;
+    uint64_t req_addr;
     uint64_t capacity;
+    uint32_t features;
+    uint32_t free_head;
+    uint8_t status;
+    char *data;
 
     bus = test_start();
 
@@ -83,6 +155,124 @@ static void pci_basic(void)
     capacity = qvirtio_config_readq(&qvirtio_pci, &dev->vdev, addr);
     g_assert_cmpint(capacity, ==, TEST_IMAGE_SIZE / 512);
 
+    features = qvirtio_get_features(&qvirtio_pci, &dev->vdev);
+    features = features & ~(QVIRTIO_F_BAD_FEATURE |
+                    QVIRTIO_F_RING_INDIRECT_DESC | QVIRTIO_F_RING_EVENT_IDX |
+                            QVIRTIO_BLK_F_SCSI);
+    qvirtio_set_features(&qvirtio_pci, &dev->vdev, features);
+
+    alloc = pc_alloc_init();
+    vq = qvirtqueue_setup(&qvirtio_pci, &dev->vdev, alloc, 0);
+
+    qvirtio_set_driver_ok(&qvirtio_pci, &dev->vdev);
+
+    /* Write and read with 2 descriptor layout */
+    /* Write request */
+    req.type = QVIRTIO_BLK_T_OUT;
+    req.ioprio = 1;
+    req.sector = 0;
+    req.data = g_malloc0(512);
+    strcpy(req.data, "TEST");
+
+    req_addr = virtio_blk_request(alloc, &req, 512);
+
+    g_free(req.data);
+
+    free_head = qvirtqueue_add(vq, req_addr, 528, false, true);
+    qvirtqueue_add(vq, req_addr + 528, 1, true, false);
+    qvirtqueue_kick(&qvirtio_pci, &dev->vdev, vq, free_head);
+
+    g_assert(qvirtio_wait_isr(&qvirtio_pci, &dev->vdev, 0x1,
+                                                        QVIRTIO_BLK_TIMEOUT));
+    status = readb(req_addr + 528);
+    g_assert_cmpint(status, ==, 0);
+
+    guest_free(alloc, req_addr);
+
+    /* Read request */
+    req.type = QVIRTIO_BLK_T_IN;
+    req.ioprio = 1;
+    req.sector = 0;
+    req.data = g_malloc0(512);
+
+    req_addr = virtio_blk_request(alloc, &req, 512);
+
+    g_free(req.data);
+
+    free_head = qvirtqueue_add(vq, req_addr, 16, false, true);
+    qvirtqueue_add(vq, req_addr + 16, 513, true, false);
+
+    qvirtqueue_kick(&qvirtio_pci, &dev->vdev, vq, free_head);
+
+    g_assert(qvirtio_wait_isr(&qvirtio_pci, &dev->vdev, 0x1,
+                                                        QVIRTIO_BLK_TIMEOUT));
+    status = readb(req_addr + 528);
+    g_assert_cmpint(status, ==, 0);
+
+    data = g_malloc0(512);
+    memread(req_addr + 16, data, 512);
+    g_assert_cmpstr(data, ==, "TEST");
+    g_free(data);
+
+    guest_free(alloc, req_addr);
+
+    /* Write and read with 3 descriptor layout */
+    /* Write request */
+    req.type = QVIRTIO_BLK_T_OUT;
+    req.ioprio = 1;
+    req.sector = 1;
+    req.data = g_malloc0(512);
+    strcpy(req.data, "TEST");
+
+    req_addr = virtio_blk_request(alloc, &req, 512);
+
+    g_free(req.data);
+
+    free_head = qvirtqueue_add(vq, req_addr, 16, false, true);
+    qvirtqueue_add(vq, req_addr + 16, 512, false, true);
+    qvirtqueue_add(vq, req_addr + 528, 1, true, false);
+
+    qvirtqueue_kick(&qvirtio_pci, &dev->vdev, vq, free_head);
+
+    g_assert(qvirtio_wait_isr(&qvirtio_pci, &dev->vdev, 0x1,
+                                                        QVIRTIO_BLK_TIMEOUT));
+    status = readb(req_addr + 528);
+    g_assert_cmpint(status, ==, 0);
+
+    guest_free(alloc, req_addr);
+
+    /* Read request */
+    req.type = QVIRTIO_BLK_T_IN;
+    req.ioprio = 1;
+    req.sector = 1;
+    req.data = g_malloc0(512);
+
+    req_addr = virtio_blk_request(alloc, &req, 512);
+
+    g_free(req.data);
+
+    free_head = qvirtqueue_add(vq, req_addr, 16, false, true);
+    qvirtqueue_add(vq, req_addr + 16, 512, true, true);
+    qvirtqueue_add(vq, req_addr + 528, 1, true, false);
+
+    qvirtqueue_kick(&qvirtio_pci, &dev->vdev, vq, free_head);
+
+    g_assert(qvirtio_wait_isr(&qvirtio_pci, &dev->vdev, 0x1,
+                                                        QVIRTIO_BLK_TIMEOUT));
+    status = readb(req_addr + 528);
+    g_assert_cmpint(status, ==, 0);
+
+    guest_free(alloc, req_addr);
+
+    data = g_malloc0(512);
+    memread(req_addr + 16, data, 512);
+    g_assert_cmpstr(data, ==, "TEST");
+    g_free(data);
+
+    guest_free(alloc, req_addr);
+
+    /* End test */
+    guest_free(alloc, vq->desc);
     qvirtio_pci_device_disable(dev);
     g_free(dev);
     test_end();
commit 46e0cf762985e0a85529efd454402998c5021212
Author: Marc MarÃ <marc.mari.barcelo at gmail.com>
Date:   Mon Sep 1 12:07:55 2014 +0200

    tests: Add virtio device initialization
    
    Add functions to read and write virtio header fields.
    Add status bit setting in virtio-blk-device.
    
    Signed-off-by: Marc MarÃ <marc.mari.barcelo at gmail.com>
    Signed-off-by: Stefan Hajnoczi <stefanha at redhat.com>

diff --git a/tests/Makefile b/tests/Makefile
index 0572633..d5db97b 100644
--- a/tests/Makefile
+++ b/tests/Makefile
@@ -299,7 +299,7 @@ libqos-obj-y += tests/libqos/i2c.o
 libqos-pc-obj-y = $(libqos-obj-y) tests/libqos/pci-pc.o
 libqos-pc-obj-y += tests/libqos/malloc-pc.o
 libqos-omap-obj-y = $(libqos-obj-y) tests/libqos/i2c-omap.o
-libqos-virtio-obj-y = $(libqos-obj-y) $(libqos-pc-obj-y) tests/libqos/virtio-pci.o
+libqos-virtio-obj-y = $(libqos-obj-y) $(libqos-pc-obj-y) tests/libqos/virtio.o tests/libqos/virtio-pci.o
 
 tests/rtc-test$(EXESUF): tests/rtc-test.o
 tests/m48t59-test$(EXESUF): tests/m48t59-test.o
diff --git a/tests/libqos/virtio-pci.c b/tests/libqos/virtio-pci.c
index fde1b1f..1a37620 100644
--- a/tests/libqos/virtio-pci.c
+++ b/tests/libqos/virtio-pci.c
@@ -8,6 +8,7 @@
  */
 
 #include <glib.h>
+#include <stdio.h>
 #include "libqtest.h"
 #include "libqos/virtio.h"
 #include "libqos/virtio-pci.h"
@@ -55,6 +56,64 @@ static void qvirtio_pci_assign_device(QVirtioDevice *d, void *data)
     *vpcidev = (QVirtioPCIDevice *)d;
 }
 
+static uint8_t qvirtio_pci_config_readb(QVirtioDevice *d, void *addr)
+{
+    QVirtioPCIDevice *dev = (QVirtioPCIDevice *)d;
+    return qpci_io_readb(dev->pdev, addr);
+}
+
+static uint16_t qvirtio_pci_config_readw(QVirtioDevice *d, void *addr)
+{
+    QVirtioPCIDevice *dev = (QVirtioPCIDevice *)d;
+    return qpci_io_readw(dev->pdev, addr);
+}
+
+static uint32_t qvirtio_pci_config_readl(QVirtioDevice *d, void *addr)
+{
+    QVirtioPCIDevice *dev = (QVirtioPCIDevice *)d;
+    return qpci_io_readl(dev->pdev, addr);
+}
+
+static uint64_t qvirtio_pci_config_readq(QVirtioDevice *d, void *addr)
+{
+    QVirtioPCIDevice *dev = (QVirtioPCIDevice *)d;
+    int i;
+    uint64_t u64 = 0;
+
+    if (qtest_big_endian()) {
+        for (i = 0; i < 8; ++i) {
+            u64 |= (uint64_t)qpci_io_readb(dev->pdev, addr + i) << (7 - i) * 8;
+        }
+    } else {
+        for (i = 0; i < 8; ++i) {
+            u64 |= (uint64_t)qpci_io_readb(dev->pdev, addr + i) << i * 8;
+        }
+    }
+
+    return u64;
+}
+
+static uint8_t qvirtio_pci_get_status(QVirtioDevice *d)
+{
+    QVirtioPCIDevice *dev = (QVirtioPCIDevice *)d;
+    return qpci_io_readb(dev->pdev, dev->addr + QVIRTIO_DEVICE_STATUS);
+}
+
+static void qvirtio_pci_set_status(QVirtioDevice *d, uint8_t status)
+{
+    QVirtioPCIDevice *dev = (QVirtioPCIDevice *)d;
+    qpci_io_writeb(dev->pdev, dev->addr + QVIRTIO_DEVICE_STATUS, status);
+}
+
+const QVirtioBus qvirtio_pci = {
+    .config_readb = qvirtio_pci_config_readb,
+    .config_readw = qvirtio_pci_config_readw,
+    .config_readl = qvirtio_pci_config_readl,
+    .config_readq = qvirtio_pci_config_readq,
+    .get_status = qvirtio_pci_get_status,
+    .set_status = qvirtio_pci_set_status,
+};
+
 void qvirtio_pci_foreach(QPCIBus *bus, uint16_t device_type,
                 void (*func)(QVirtioDevice *d, void *data), void *data)
 {
@@ -73,3 +132,15 @@ QVirtioPCIDevice *qvirtio_pci_device_find(QPCIBus *bus, uint16_t device_type)
 
     return dev;
 }
+
+void qvirtio_pci_device_enable(QVirtioPCIDevice *d)
+{
+    qpci_device_enable(d->pdev);
+    d->addr = qpci_iomap(d->pdev, 0, NULL);
+    g_assert(d->addr != NULL);
+}
+
+void qvirtio_pci_device_disable(QVirtioPCIDevice *d)
+{
+    qpci_iounmap(d->pdev, d->addr);
+}
diff --git a/tests/libqos/virtio-pci.h b/tests/libqos/virtio-pci.h
index 5101abb..26f902e 100644
--- a/tests/libqos/virtio-pci.h
+++ b/tests/libqos/virtio-pci.h
@@ -13,12 +13,30 @@
 #include "libqos/virtio.h"
 #include "libqos/pci.h"
 
+#define QVIRTIO_DEVICE_FEATURES         0x00
+#define QVIRTIO_GUEST_FEATURES          0x04
+#define QVIRTIO_QUEUE_ADDRESS           0x08
+#define QVIRTIO_QUEUE_SIZE              0x0C
+#define QVIRTIO_QUEUE_SELECT            0x0E
+#define QVIRTIO_QUEUE_NOTIFY            0x10
+#define QVIRTIO_DEVICE_STATUS           0x12
+#define QVIRTIO_ISR_STATUS              0x13
+#define QVIRTIO_MSIX_CONF_VECTOR        0x14
+#define QVIRTIO_MSIX_QUEUE_VECTOR       0x16
+#define QVIRTIO_DEVICE_SPECIFIC_MSIX    0x18
+#define QVIRTIO_DEVICE_SPECIFIC_NO_MSIX 0x14
+
 typedef struct QVirtioPCIDevice {
     QVirtioDevice vdev;
     QPCIDevice *pdev;
+    void *addr;
 } QVirtioPCIDevice;
 
+extern const QVirtioBus qvirtio_pci;
+
 void qvirtio_pci_foreach(QPCIBus *bus, uint16_t device_type,
                 void (*func)(QVirtioDevice *d, void *data), void *data);
 QVirtioPCIDevice *qvirtio_pci_device_find(QPCIBus *bus, uint16_t device_type);
+void qvirtio_pci_device_enable(QVirtioPCIDevice *d);
+void qvirtio_pci_device_disable(QVirtioPCIDevice *d);
 #endif
diff --git a/tests/libqos/virtio.c b/tests/libqos/virtio.c
new file mode 100644
index 0000000..577d679
--- /dev/null
+++ b/tests/libqos/virtio.c
@@ -0,0 +1,55 @@
+/*
+ * libqos virtio driver
+ *
+ * Copyright (c) 2014 Marc MarÃ
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include <glib.h>
+#include "libqtest.h"
+#include "libqos/virtio.h"
+
+uint8_t qvirtio_config_readb(const QVirtioBus *bus, QVirtioDevice *d,
+                                                                void *addr)
+{
+    return bus->config_readb(d, addr);
+}
+
+uint16_t qvirtio_config_readw(const QVirtioBus *bus, QVirtioDevice *d,
+                                                                void *addr)
+{
+    return bus->config_readw(d, addr);
+}
+
+uint32_t qvirtio_config_readl(const QVirtioBus *bus, QVirtioDevice *d,
+                                                                void *addr)
+{
+    return bus->config_readl(d, addr);
+}
+
+uint64_t qvirtio_config_readq(const QVirtioBus *bus, QVirtioDevice *d,
+                                                                void *addr)
+{
+    return bus->config_readq(d, addr);
+}
+
+void qvirtio_reset(const QVirtioBus *bus, QVirtioDevice *d)
+{
+    bus->set_status(d, QVIRTIO_RESET);
+    g_assert_cmphex(bus->get_status(d), ==, QVIRTIO_RESET);
+}
+
+void qvirtio_set_acknowledge(const QVirtioBus *bus, QVirtioDevice *d)
+{
+    bus->set_status(d, bus->get_status(d) | QVIRTIO_ACKNOWLEDGE);
+    g_assert_cmphex(bus->get_status(d), ==, QVIRTIO_ACKNOWLEDGE);
+}
+
+void qvirtio_set_driver(const QVirtioBus *bus, QVirtioDevice *d)
+{
+    bus->set_status(d, bus->get_status(d) | QVIRTIO_DRIVER);
+    g_assert_cmphex(bus->get_status(d), ==,
+                                    QVIRTIO_DRIVER | QVIRTIO_ACKNOWLEDGE);
+}
diff --git a/tests/libqos/virtio.h b/tests/libqos/virtio.h
index 2a05798..8d7238b 100644
--- a/tests/libqos/virtio.h
+++ b/tests/libqos/virtio.h
@@ -12,6 +12,10 @@
 
 #define QVIRTIO_VENDOR_ID       0x1AF4
 
+#define QVIRTIO_RESET           0x0
+#define QVIRTIO_ACKNOWLEDGE     0x1
+#define QVIRTIO_DRIVER          0x2
+
 #define QVIRTIO_NET_DEVICE_ID   0x1
 #define QVIRTIO_BLK_DEVICE_ID   0x2
 
@@ -20,4 +24,30 @@ typedef struct QVirtioDevice {
     uint16_t device_type;
 } QVirtioDevice;
 
+typedef struct QVirtioBus {
+    uint8_t (*config_readb)(QVirtioDevice *d, void *addr);
+    uint16_t (*config_readw)(QVirtioDevice *d, void *addr);
+    uint32_t (*config_readl)(QVirtioDevice *d, void *addr);
+    uint64_t (*config_readq)(QVirtioDevice *d, void *addr);
+
+    /* Get status of the device */
+    uint8_t (*get_status)(QVirtioDevice *d);
+
+    /* Set status of the device  */
+    void (*set_status)(QVirtioDevice *d, uint8_t status);
+} QVirtioBus;
+
+uint8_t qvirtio_config_readb(const QVirtioBus *bus, QVirtioDevice *d,
+                                                                void *addr);
+uint16_t qvirtio_config_readw(const QVirtioBus *bus, QVirtioDevice *d,
+                                                                void *addr);
+uint32_t qvirtio_config_readl(const QVirtioBus *bus, QVirtioDevice *d,
+                                                                void *addr);
+uint64_t qvirtio_config_readq(const QVirtioBus *bus, QVirtioDevice *d,
+                                                                void *addr);
+
+void qvirtio_reset(const QVirtioBus *bus, QVirtioDevice *d);
+void qvirtio_set_acknowledge(const QVirtioBus *bus, QVirtioDevice *d);
+void qvirtio_set_driver(const QVirtioBus *bus, QVirtioDevice *d);
+
 #endif
diff --git a/tests/libqtest.c b/tests/libqtest.c
index 5e458e8..9a92aa7 100644
--- a/tests/libqtest.c
+++ b/tests/libqtest.c
@@ -696,3 +696,51 @@ void qmp_discard_response(const char *fmt, ...)
     qtest_qmpv_discard_response(global_qtest, fmt, ap);
     va_end(ap);
 }
+
+bool qtest_big_endian(void)
+{
+    const char *arch = qtest_get_arch();
+    int i;
+
+    static const struct {
+        const char *arch;
+        bool big_endian;
+    } endianness[] = {
+        { "aarch64", false },
+        { "alpha", false },
+        { "arm", false },
+        { "cris", false },
+        { "i386", false },
+        { "lm32", true },
+        { "m68k", true },
+        { "microblaze", true },
+        { "microblazeel", false },
+        { "mips", true },
+        { "mips64", true },
+        { "mips64el", false },
+        { "mipsel", false },
+        { "moxie", true },
+        { "or32", true },
+        { "ppc", true },
+        { "ppc64", true },
+        { "ppcemb", true },
+        { "s390x", true },
+        { "sh4", false },
+        { "sh4eb", true },
+        { "sparc", true },
+        { "sparc64", true },
+        { "unicore32", false },
+        { "x86_64", false },
+        { "xtensa", false },
+        { "xtensaeb", true },
+        {},
+    };
+
+    for (i = 0; endianness[i].arch; i++) {
+        if (strcmp(endianness[i].arch, arch) == 0) {
+            return endianness[i].big_endian;
+        }
+    }
+
+    return false;
+}
diff --git a/tests/libqtest.h b/tests/libqtest.h
index 1be0934..3e12cab 100644
--- a/tests/libqtest.h
+++ b/tests/libqtest.h
@@ -682,4 +682,11 @@ static inline int64_t clock_set(int64_t val)
     return qtest_clock_set(global_qtest, val);
 }
 
+/**
+ * qtest_big_endian:
+ *
+ * Returns: True if the architecture under test has a big endian configuration.
+ */
+bool qtest_big_endian(void);
+
 #endif
diff --git a/tests/virtio-blk-test.c b/tests/virtio-blk-test.c
index 4d87a3e..649f7cf 100644
--- a/tests/virtio-blk-test.c
+++ b/tests/virtio-blk-test.c
@@ -49,18 +49,41 @@ static void test_end(void)
     qtest_end();
 }
 
-static void pci_basic(void)
+static QVirtioPCIDevice *virtio_blk_init(QPCIBus *bus)
 {
     QVirtioPCIDevice *dev;
-    QPCIBus *bus;
-
-    bus = test_start();
 
     dev = qvirtio_pci_device_find(bus, QVIRTIO_BLK_DEVICE_ID);
     g_assert(dev != NULL);
     g_assert_cmphex(dev->vdev.device_type, ==, QVIRTIO_BLK_DEVICE_ID);
     g_assert_cmphex(dev->pdev->devfn, ==, ((PCI_SLOT << 3) | PCI_FN));
 
+    qvirtio_pci_device_enable(dev);
+    qvirtio_reset(&qvirtio_pci, &dev->vdev);
+    qvirtio_set_acknowledge(&qvirtio_pci, &dev->vdev);
+    qvirtio_set_driver(&qvirtio_pci, &dev->vdev);
+
+    return dev;
+}
+
+static void pci_basic(void)
+{
+    QVirtioPCIDevice *dev;
+    QPCIBus *bus;
+    void *addr;
+    uint64_t capacity;
+
+    bus = test_start();
+
+    dev = virtio_blk_init(bus);
+
+    /* MSI-X is not enabled */
+    addr = dev->addr + QVIRTIO_DEVICE_SPECIFIC_NO_MSIX;
+
+    capacity = qvirtio_config_readq(&qvirtio_pci, &dev->vdev, addr);
+    g_assert_cmpint(capacity, ==, TEST_IMAGE_SIZE / 512);
+
+    qvirtio_pci_device_disable(dev);
     g_free(dev);
     test_end();
 }
commit 311e666aea7164b6d3b8a7e845fb32a509bfdf08
Author: Marc MarÃ <marc.mari.barcelo at gmail.com>
Date:   Mon Sep 1 12:07:54 2014 +0200

    tests: Functions bus_foreach and device_find from libqos virtio API
    
    Virtio header has been changed to compile and work with a real device.
    Functions bus_foreach and device_find have been implemented for PCI.
    Virtio-blk test case now opens a fake device.
    
    Reviewed-by: Stefan Hajnoczi <stefanha at redhat.com>
    Signed-off-by: Marc MarÃ <marc.mari.barcelo at gmail.com>
    Signed-off-by: Stefan Hajnoczi <stefanha at redhat.com>

diff --git a/tests/Makefile b/tests/Makefile
index 469c0a5..0572633 100644
--- a/tests/Makefile
+++ b/tests/Makefile
@@ -299,6 +299,7 @@ libqos-obj-y += tests/libqos/i2c.o
 libqos-pc-obj-y = $(libqos-obj-y) tests/libqos/pci-pc.o
 libqos-pc-obj-y += tests/libqos/malloc-pc.o
 libqos-omap-obj-y = $(libqos-obj-y) tests/libqos/i2c-omap.o
+libqos-virtio-obj-y = $(libqos-obj-y) $(libqos-pc-obj-y) tests/libqos/virtio-pci.o
 
 tests/rtc-test$(EXESUF): tests/rtc-test.o
 tests/m48t59-test$(EXESUF): tests/m48t59-test.o
@@ -320,7 +321,7 @@ tests/vmxnet3-test$(EXESUF): tests/vmxnet3-test.o
 tests/ne2000-test$(EXESUF): tests/ne2000-test.o
 tests/wdt_ib700-test$(EXESUF): tests/wdt_ib700-test.o
 tests/virtio-balloon-test$(EXESUF): tests/virtio-balloon-test.o
-tests/virtio-blk-test$(EXESUF): tests/virtio-blk-test.o
+tests/virtio-blk-test$(EXESUF): tests/virtio-blk-test.o $(libqos-virtio-obj-y)
 tests/virtio-net-test$(EXESUF): tests/virtio-net-test.o
 tests/virtio-rng-test$(EXESUF): tests/virtio-rng-test.o
 tests/virtio-scsi-test$(EXESUF): tests/virtio-scsi-test.o
diff --git a/tests/libqos/virtio-pci.c b/tests/libqos/virtio-pci.c
new file mode 100644
index 0000000..fde1b1f
--- /dev/null
+++ b/tests/libqos/virtio-pci.c
@@ -0,0 +1,75 @@
+/*
+ * libqos virtio PCI driver
+ *
+ * Copyright (c) 2014 Marc MarÃ
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include <glib.h>
+#include "libqtest.h"
+#include "libqos/virtio.h"
+#include "libqos/virtio-pci.h"
+#include "libqos/pci.h"
+#include "libqos/pci-pc.h"
+
+#include "hw/pci/pci_regs.h"
+
+typedef struct QVirtioPCIForeachData {
+    void (*func)(QVirtioDevice *d, void *data);
+    uint16_t device_type;
+    void *user_data;
+} QVirtioPCIForeachData;
+
+static QVirtioPCIDevice *qpcidevice_to_qvirtiodevice(QPCIDevice *pdev)
+{
+    QVirtioPCIDevice *vpcidev;
+    vpcidev = g_malloc0(sizeof(*vpcidev));
+
+    if (pdev) {
+        vpcidev->pdev = pdev;
+        vpcidev->vdev.device_type =
+                            qpci_config_readw(vpcidev->pdev, PCI_SUBSYSTEM_ID);
+    }
+
+    return vpcidev;
+}
+
+static void qvirtio_pci_foreach_callback(
+                        QPCIDevice *dev, int devfn, void *data)
+{
+    QVirtioPCIForeachData *d = data;
+    QVirtioPCIDevice *vpcidev = qpcidevice_to_qvirtiodevice(dev);
+
+    if (vpcidev->vdev.device_type == d->device_type) {
+        d->func(&vpcidev->vdev, d->user_data);
+    } else {
+        g_free(vpcidev);
+    }
+}
+
+static void qvirtio_pci_assign_device(QVirtioDevice *d, void *data)
+{
+    QVirtioPCIDevice **vpcidev = data;
+    *vpcidev = (QVirtioPCIDevice *)d;
+}
+
+void qvirtio_pci_foreach(QPCIBus *bus, uint16_t device_type,
+                void (*func)(QVirtioDevice *d, void *data), void *data)
+{
+    QVirtioPCIForeachData d = { .func = func,
+                                .device_type = device_type,
+                                .user_data = data };
+
+    qpci_device_foreach(bus, QVIRTIO_VENDOR_ID, -1,
+                                qvirtio_pci_foreach_callback, &d);
+}
+
+QVirtioPCIDevice *qvirtio_pci_device_find(QPCIBus *bus, uint16_t device_type)
+{
+    QVirtioPCIDevice *dev = NULL;
+    qvirtio_pci_foreach(bus, device_type, qvirtio_pci_assign_device, &dev);
+
+    return dev;
+}
diff --git a/tests/libqos/virtio-pci.h b/tests/libqos/virtio-pci.h
new file mode 100644
index 0000000..5101abb
--- /dev/null
+++ b/tests/libqos/virtio-pci.h
@@ -0,0 +1,24 @@
+/*
+ * libqos virtio PCI definitions
+ *
+ * Copyright (c) 2014 Marc MarÃ
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef LIBQOS_VIRTIO_PCI_H
+#define LIBQOS_VIRTIO_PCI_H
+
+#include "libqos/virtio.h"
+#include "libqos/pci.h"
+
+typedef struct QVirtioPCIDevice {
+    QVirtioDevice vdev;
+    QPCIDevice *pdev;
+} QVirtioPCIDevice;
+
+void qvirtio_pci_foreach(QPCIBus *bus, uint16_t device_type,
+                void (*func)(QVirtioDevice *d, void *data), void *data);
+QVirtioPCIDevice *qvirtio_pci_device_find(QPCIBus *bus, uint16_t device_type);
+#endif
diff --git a/tests/libqos/virtio.h b/tests/libqos/virtio.h
new file mode 100644
index 0000000..2a05798
--- /dev/null
+++ b/tests/libqos/virtio.h
@@ -0,0 +1,23 @@
+/*
+ * libqos virtio definitions
+ *
+ * Copyright (c) 2014 Marc MarÃ
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef LIBQOS_VIRTIO_H
+#define LIBQOS_VIRTIO_H
+
+#define QVIRTIO_VENDOR_ID       0x1AF4
+
+#define QVIRTIO_NET_DEVICE_ID   0x1
+#define QVIRTIO_BLK_DEVICE_ID   0x2
+
+typedef struct QVirtioDevice {
+    /* Device type */
+    uint16_t device_type;
+} QVirtioDevice;
+
+#endif
diff --git a/tests/virtio-blk-test.c b/tests/virtio-blk-test.c
index d53f875..4d87a3e 100644
--- a/tests/virtio-blk-test.c
+++ b/tests/virtio-blk-test.c
@@ -2,6 +2,7 @@
  * QTest testcase for VirtIO Block Device
  *
  * Copyright (c) 2014 SUSE LINUX Products GmbH
+ * Copyright (c) 2014 Marc MarÃ
  *
  * This work is licensed under the terms of the GNU GPL, version 2 or later.
  * See the COPYING file in the top-level directory.
@@ -9,12 +10,59 @@
 
 #include <glib.h>
 #include <string.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <stdio.h>
 #include "libqtest.h"
-#include "qemu/osdep.h"
+#include "libqos/virtio.h"
+#include "libqos/virtio-pci.h"
+#include "libqos/pci-pc.h"
 
-/* Tests only initialization so far. TODO: Replace with functional tests */
-static void pci_nop(void)
+#define TEST_IMAGE_SIZE (64 * 1024 * 1024)
+#define PCI_SLOT    0x04
+#define PCI_FN      0x00
+
+static QPCIBus *test_start(void)
 {
+    char cmdline[100];
+    char tmp_path[] = "/tmp/qtest.XXXXXX";
+    int fd, ret;
+
+    /* Create a temporary raw image */
+    fd = mkstemp(tmp_path);
+    g_assert_cmpint(fd, >=, 0);
+    ret = ftruncate(fd, TEST_IMAGE_SIZE);
+    g_assert_cmpint(ret, ==, 0);
+    close(fd);
+
+    snprintf(cmdline, 100, "-drive if=none,id=drive0,file=%s "
+                            "-device virtio-blk-pci,drive=drive0,addr=%x.%x",
+                            tmp_path, PCI_SLOT, PCI_FN);
+    qtest_start(cmdline);
+    unlink(tmp_path);
+
+    return qpci_init_pc();
+}
+
+static void test_end(void)
+{
+    qtest_end();
+}
+
+static void pci_basic(void)
+{
+    QVirtioPCIDevice *dev;
+    QPCIBus *bus;
+
+    bus = test_start();
+
+    dev = qvirtio_pci_device_find(bus, QVIRTIO_BLK_DEVICE_ID);
+    g_assert(dev != NULL);
+    g_assert_cmphex(dev->vdev.device_type, ==, QVIRTIO_BLK_DEVICE_ID);
+    g_assert_cmphex(dev->pdev->devfn, ==, ((PCI_SLOT << 3) | PCI_FN));
+
+    g_free(dev);
+    test_end();
 }
 
 int main(int argc, char **argv)
@@ -22,13 +70,10 @@ int main(int argc, char **argv)
     int ret;
 
     g_test_init(&argc, &argv, NULL);
-    qtest_add_func("/virtio/blk/pci/nop", pci_nop);
 
-    qtest_start("-drive id=drv0,if=none,file=/dev/null "
-                "-device virtio-blk-pci,drive=drv0");
-    ret = g_test_run();
+    g_test_add_func("/virtio/blk/pci/basic", pci_basic);
 
-    qtest_end();
+    ret = g_test_run();
 
     return ret;
 }
commit 4c0cfc72b31a79f737a64ebbe0411e4b83e25771
Author: Laszlo Ersek <lersek at redhat.com>
Date:   Sat Aug 23 12:19:07 2014 +0200

    pflash_cfi01: write flash contents to bdrv on incoming migration
    
    A drive that backs a pflash device is special:
    - it is very small,
    - its entire contents are kept in a RAMBlock at all times, covering the
      guest-phys address range that provides the guest's view of the emulated
      flash chip.
    
    The pflash device model keeps the drive (the host-side file) and the
    guest-visible flash contents in sync. When migrating the guest, the
    guest-visible flash contents (the RAMBlock) is migrated by default, but on
    the target host, the drive (the host-side file) remains in full sync with
    the RAMBlock only if:
    - the source and target hosts share the storage underlying the pflash
      drive,
    - or the migration requests full or incremental block migration too, which
      then covers all drives.
    
    Due to the special nature of pflash drives, the following scenario makes
    sense as well:
    - no full nor incremental block migration, covering all drives, alongside
      the base migration (justified eg. by shared storage for "normal" (big)
      drives),
    - non-shared storage for pflash drives.
    
    In this case, currently only those portions of the flash drive are updated
    on the target disk that the guest reprograms while running on the target
    host.
    
    In order to restore accord, dump the entire flash contents to the bdrv in
    a post_load() callback.
    
    - The read-only check follows the other call-sites of pflash_update();
    - both "pfl->ro" and pflash_update() reflect / consider the case when
      "pfl->bs" is NULL;
    - the total size of the flash device is calculated as in
      pflash_cfi01_realize().
    
    When using shared storage, or requesting full or incremental block
    migration along with the normal migration, the patch should incur a
    harmless rewrite from the target side.
    
    It is assumed that, on the target host, RAM is loaded ahead of the call to
    pflash_post_load().
    
    Suggested-by: Paolo Bonzini <pbonzini at redhat.com>
    Signed-off-by: Laszlo Ersek <lersek at redhat.com>
    Signed-off-by: Stefan Hajnoczi <stefanha at redhat.com>

diff --git a/hw/block/pflash_cfi01.c b/hw/block/pflash_cfi01.c
index fddef39..593fbc5 100644
--- a/hw/block/pflash_cfi01.c
+++ b/hw/block/pflash_cfi01.c
@@ -94,10 +94,13 @@ struct pflash_t {
     void *storage;
 };
 
+static int pflash_post_load(void *opaque, int version_id);
+
 static const VMStateDescription vmstate_pflash = {
     .name = "pflash_cfi01",
     .version_id = 1,
     .minimum_version_id = 1,
+    .post_load = pflash_post_load,
     .fields = (VMStateField[]) {
         VMSTATE_UINT8(wcycle, pflash_t),
         VMSTATE_UINT8(cmd, pflash_t),
@@ -982,3 +985,14 @@ MemoryRegion *pflash_cfi01_get_memory(pflash_t *fl)
 {
     return &fl->mem;
 }
+
+static int pflash_post_load(void *opaque, int version_id)
+{
+    pflash_t *pfl = opaque;
+
+    if (!pfl->ro) {
+        DPRINTF("%s: updating bdrv for %s\n", __func__, pfl->name);
+        pflash_update(pfl, 0, pfl->sector_len * pfl->nb_blocs);
+    }
+    return 0;
+}
commit afeb25f9263e470ad715cab2b79b8965c0519fb7
Author: Laszlo Ersek <lersek at redhat.com>
Date:   Sat Aug 23 12:19:06 2014 +0200

    pflash_cfi01: fixup stale DPRINTF() calls
    
    Signed-off-by: Laszlo Ersek <lersek at redhat.com>
    Signed-off-by: Stefan Hajnoczi <stefanha at redhat.com>

diff --git a/hw/block/pflash_cfi01.c b/hw/block/pflash_cfi01.c
index 2238f39..fddef39 100644
--- a/hw/block/pflash_cfi01.c
+++ b/hw/block/pflash_cfi01.c
@@ -209,11 +209,11 @@ static uint32_t pflash_devid_query(pflash_t *pfl, hwaddr offset)
     switch (boff & 0xFF) {
     case 0:
         resp = pfl->ident0;
-        DPRINTF("%s: Manufacturer Code %04x\n", __func__, ret);
+        DPRINTF("%s: Manufacturer Code %04x\n", __func__, resp);
         break;
     case 1:
         resp = pfl->ident1;
-        DPRINTF("%s: Device ID Code %04x\n", __func__, ret);
+        DPRINTF("%s: Device ID Code %04x\n", __func__, resp);
         break;
     default:
         DPRINTF("%s: Read Device Information offset=%x\n", __func__,
commit 6bb4515849315460bdea45fa56ca656cead7c8ed
Author: Liu Yuan <namei.unix at gmail.com>
Date:   Mon Sep 1 13:35:21 2014 +0800

    block: kill tail whitespace in block.c
    
    Cc: Kevin Wolf <kwolf at redhat.com>
    Cc: Stefan Hajnoczi <stefanha at redhat.com>
    Signed-off-by: Liu Yuan <namei.unix at gmail.com>
    Reviewed-by: BenoÃ®t Canet <benoit.canet at nodalink.com>
    Signed-off-by: Stefan Hajnoczi <stefanha at redhat.com>

diff --git a/block.c b/block.c
index cb670fd..d06dd51 100644
--- a/block.c
+++ b/block.c
@@ -2246,7 +2246,7 @@ int bdrv_commit(BlockDriverState *bs)
 
     if (!drv)
         return -ENOMEDIUM;
-    
+
     if (!bs->backing_hd) {
         return -ENOTSUP;
     }