[Spice-commits] 53 commits - block/iscsi.c block/nbd.c cputlb.c dma-helpers.c exec.c hw/block hw/core hw/display hw/dma hw/i386 hw/ide hw/intc hw/microblaze hw/misc hw/pci hw/ppc hw/s390x hw/scsi hw/sd hw/usb hw/virtio hw/watchdog hw/xen include/exec include/hw include/qemu include/sysemu kvm-all.c memory.c net/slirp.c qemu-options.hx slirp/tcp_input.c target-s390x/cpu.h target-sparc/mmu_helper.c translate-all.c util/qemu-option.c util/qemu-sockets.c vl.c xen-all.c

Gerd Hoffmann kraxel at kemper.freedesktop.org
Mon Jun 24 00:53:39 PDT 2013


 block/iscsi.c                            |  129 +++-----
 block/nbd.c                              |   11 
 cputlb.c                                 |    4 
 dma-helpers.c                            |  190 ------------
 exec.c                                   |  458 ++++++++++++++++---------------
 hw/block/nvme.c                          |    2 
 hw/core/loader.c                         |    2 
 hw/display/exynos4210_fimd.c             |    4 
 hw/display/framebuffer.c                 |    3 
 hw/dma/pl330.c                           |    8 
 hw/i386/pc.c                             |    7 
 hw/ide/ahci.c                            |   18 -
 hw/ide/ahci.h                            |    4 
 hw/ide/ich.c                             |    2 
 hw/ide/macio.c                           |    4 
 hw/intc/xilinx_intc.c                    |   28 +
 hw/microblaze/petalogix_s3adsp1800_mmu.c |    2 
 hw/misc/vfio.c                           |    6 
 hw/pci/pci.c                             |   37 +-
 hw/ppc/spapr_iommu.c                     |  119 +++-----
 hw/ppc/spapr_pci.c                       |   16 -
 hw/ppc/spapr_vio.c                       |   13 
 hw/s390x/s390-virtio-bus.c               |    2 
 hw/s390x/virtio-ccw.c                    |    2 
 hw/scsi/megasas.c                        |    4 
 hw/scsi/scsi-disk.c                      |   17 -
 hw/scsi/scsi-generic.c                   |    8 
 hw/scsi/vhost-scsi.c                     |    2 
 hw/scsi/virtio-scsi.c                    |    2 
 hw/scsi/vmw_pvscsi.c                     |    2 
 hw/sd/sdhci.c                            |   22 -
 hw/usb/hcd-ehci-pci.c                    |    4 
 hw/usb/hcd-ehci-sysbus.c                 |    2 
 hw/usb/hcd-ehci.c                        |   12 
 hw/usb/hcd-ehci.h                        |    2 
 hw/usb/hcd-ohci.c                        |   30 +-
 hw/usb/libhw.c                           |    4 
 hw/virtio/dataplane/hostmem.c            |    2 
 hw/virtio/vhost.c                        |    4 
 hw/virtio/virtio-balloon.c               |    2 
 hw/virtio/virtio-pci.c                   |    2 
 hw/watchdog/wdt_i6300esb.c               |   19 +
 hw/xen/xen_pt.c                          |    6 
 include/exec/cputlb.h                    |    4 
 include/exec/memory-internal.h           |   15 -
 include/exec/memory.h                    |  114 ++++++-
 include/hw/hw.h                          |    4 
 include/hw/pci-host/spapr.h              |    3 
 include/hw/pci/pci.h                     |   21 -
 include/hw/pci/pci_bus.h                 |    4 
 include/hw/ppc/spapr.h                   |   12 
 include/hw/ppc/spapr_vio.h               |   21 -
 include/qemu/int128.h                    |   19 +
 include/qemu/option.h                    |    2 
 include/sysemu/dma.h                     |  144 ++-------
 kvm-all.c                                |   23 -
 memory.c                                 |   79 +++--
 net/slirp.c                              |   12 
 qemu-options.hx                          |    3 
 slirp/tcp_input.c                        |   26 +
 target-s390x/cpu.h                       |    5 
 target-sparc/mmu_helper.c                |    2 
 translate-all.c                          |   10 
 util/qemu-option.c                       |   30 --
 util/qemu-sockets.c                      |   13 
 vl.c                                     |  121 ++------
 xen-all.c                                |    6 
 67 files changed, 895 insertions(+), 1015 deletions(-)

New commits:
commit 576156ffed72ab4feb0b752979db86ff8759a2a1
Merge: b1588c3 2eb74e1
Author: Anthony Liguori <aliguori at us.ibm.com>
Date:   Thu Jun 20 16:53:39 2013 -0500

    Merge remote-tracking branch 'bonzini/iommu-for-anthony' into staging
    
    # By Paolo Bonzini (12) and others
    # Via Paolo Bonzini
    * bonzini/iommu-for-anthony: (25 commits)
      memory: render_memory_region: factor out fr constant setters
      memory: as_update_topology_pass: Improve comments
      memory: Fix comment typo
      memory: give name to every AddressSpace
      dma: eliminate DMAContext
      spapr_vio: take care of creating our own AddressSpace/DMAContext
      pci: use memory core for iommu support
      dma: eliminate old-style IOMMU support
      spapr: use memory core for iommu support
      spapr: make IOMMU translation go through IOMMUTLBEntry
      spapr: convert TCE API to use an opaque type
      vfio: abort if an emulated iommu is used
      memory: Add iommu map/unmap notifiers
      memory: iommu support
      memory: make section size a 128-bit integer
      exec: reorganize mem_add to match Int128 version
      Revert "s390x: reduce TARGET_PHYS_ADDR_SPACE_BITS to 62"
      Revert "memory: limit sections in the radix tree to the actual address space size"
      exec: return MemoryRegion from address_space_translate
      exec: Implement subpage_read/write via address_space_rw
      ...
    
    Message-id: 1371739493-10187-1-git-send-email-pbonzini at redhat.com
    Signed-off-by: Anthony Liguori <aliguori at us.ibm.com>

commit b1588c3fd6daf6e23ba727c758f84ada279ae731
Merge: 4eda32f b5a87d2
Author: Anthony Liguori <aliguori at us.ibm.com>
Date:   Thu Jun 20 16:53:08 2013 -0500

    Merge remote-tracking branch 'kiszka/queues/slirp' into staging
    
    # By Bas van Sisseren (1) and Gertjan Halkes (1)
    # Via Jan Kiszka
    * kiszka/queues/slirp:
      make user networking hostfwd work with restrict=y
      fix -net user checks by reordering checks
    
    Message-id: cover.1371638848.git.jan.kiszka at siemens.com
    Signed-off-by: Anthony Liguori <aliguori at us.ibm.com>

commit 2eb74e1a1ef145034aa41255c4a6f469d560c96d
Author: Peter Crosthwaite <peter.crosthwaite at xilinx.com>
Date:   Mon Jun 3 15:33:29 2013 +1000

    memory: render_memory_region: factor out fr constant setters
    
    These 4 replicated lines set properties of fr that are constant over
    the course of the function. Factor out their repeated setting (and also
    guards against them being set multiple times in the loop below).
    
    Signed-off-by: Peter Crosthwaite <peter.crosthwaite at xilinx.com>
    Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>

diff --git a/memory.c b/memory.c
index 0e07d0f..47b005a 100644
--- a/memory.c
+++ b/memory.c
@@ -556,6 +556,11 @@ static void render_memory_region(FlatView *view,
     base = clip.start;
     remain = clip.size;
 
+    fr.mr = mr;
+    fr.dirty_log_mask = mr->dirty_log_mask;
+    fr.romd_mode = mr->romd_mode;
+    fr.readonly = readonly;
+
     /* Render the region itself into any gaps left by the current view. */
     for (i = 0; i < view->nr && int128_nz(remain); ++i) {
         if (int128_ge(base, addrrange_end(view->ranges[i].addr))) {
@@ -564,12 +569,8 @@ static void render_memory_region(FlatView *view,
         if (int128_lt(base, view->ranges[i].addr.start)) {
             now = int128_min(remain,
                              int128_sub(view->ranges[i].addr.start, base));
-            fr.mr = mr;
             fr.offset_in_region = offset_in_region;
             fr.addr = addrrange_make(base, now);
-            fr.dirty_log_mask = mr->dirty_log_mask;
-            fr.romd_mode = mr->romd_mode;
-            fr.readonly = readonly;
             flatview_insert(view, i, &fr);
             ++i;
             int128_addto(&base, now);
@@ -584,12 +585,8 @@ static void render_memory_region(FlatView *view,
         int128_subfrom(&remain, now);
     }
     if (int128_nz(remain)) {
-        fr.mr = mr;
         fr.offset_in_region = offset_in_region;
         fr.addr = addrrange_make(base, remain);
-        fr.dirty_log_mask = mr->dirty_log_mask;
-        fr.romd_mode = mr->romd_mode;
-        fr.readonly = readonly;
         flatview_insert(view, i, &fr);
     }
 }
commit 41a6e477fda3e3e5c8cc860458d29164625dc5e0
Author: Peter Crosthwaite <peter.crosthwaite at xilinx.com>
Date:   Mon Jun 3 15:32:42 2013 +1000

    memory: as_update_topology_pass: Improve comments
    
    These comments were a little difficult to read. First one had
    incorrect parenthesis. The part about attributes changing is
    really applicable to the region being 'in both' rather than 'in
    new'
    
    Second comment has an obscure parenthetic about 'Logging may have
    changed'. Made clearer, as this if is supposed to handle the case where
    the memory region is unchanged (with the notable exception re logging).
    
    Signed-off-by: Peter Crosthwaite <peter.crosthwaite at xilinx.com>
    Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>

diff --git a/memory.c b/memory.c
index 221b725..0e07d0f 100644
--- a/memory.c
+++ b/memory.c
@@ -719,7 +719,7 @@ static void address_space_update_topology_pass(AddressSpace *as,
                 || int128_lt(frold->addr.start, frnew->addr.start)
                 || (int128_eq(frold->addr.start, frnew->addr.start)
                     && !flatrange_equal(frold, frnew)))) {
-            /* In old, but (not in new, or in new but attributes changed). */
+            /* In old but not in new, or in both but attributes changed. */
 
             if (!adding) {
                 MEMORY_LISTENER_UPDATE_REGION(frold, as, Reverse, region_del);
@@ -727,7 +727,7 @@ static void address_space_update_topology_pass(AddressSpace *as,
 
             ++iold;
         } else if (frold && frnew && flatrange_equal(frold, frnew)) {
-            /* In both (logging may have changed) */
+            /* In both and unchanged (except logging may have changed) */
 
             if (adding) {
                 MEMORY_LISTENER_UPDATE_REGION(frnew, as, Forward, region_nop);
commit 8508e024cb31a4013bb462aca0235c963e785aeb
Author: Peter Crosthwaite <peter.crosthwaite at xilinx.com>
Date:   Mon Jun 3 15:31:56 2013 +1000

    memory: Fix comment typo
    
    s/ajacent/adjacent
    
    Signed-off-by: Peter Crosthwaite <peter.crosthwaite at xilinx.com>
    Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>

diff --git a/memory.c b/memory.c
index 8e99b8a..221b725 100644
--- a/memory.c
+++ b/memory.c
@@ -282,7 +282,7 @@ static bool can_merge(FlatRange *r1, FlatRange *r2)
         && r1->readonly == r2->readonly;
 }
 
-/* Attempt to simplify a view by merging ajacent ranges */
+/* Attempt to simplify a view by merging adjacent ranges */
 static void flatview_simplify(FlatView *view)
 {
     unsigned i, j;
commit 7dca8043f3483ff34ac954c7012b721731ee5719
Author: Alexey Kardashevskiy <aik at ozlabs.ru>
Date:   Mon Apr 29 16:25:51 2013 +0000

    memory: give name to every AddressSpace
    
    The "info mtree" command in QEMU console prints only "memory" and "I/O"
    address spaces while there are actually a lot more other AddressSpace
    structs created by PCI and VIO devices. Those devices do not normally
    have names and therefore not present in "info mtree" output.
    
    The patch fixes this.
    
    Reviewed-by: Peter Maydell <peter.maydell at linaro.org>
    Signed-off-by: Alexey Kardashevskiy <aik at ozlabs.ru>
    Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>

diff --git a/exec.c b/exec.c
index a18f833..0b0118b 100644
--- a/exec.c
+++ b/exec.c
@@ -1827,13 +1827,11 @@ static void memory_map_init(void)
 {
     system_memory = g_malloc(sizeof(*system_memory));
     memory_region_init(system_memory, "system", INT64_MAX);
-    address_space_init(&address_space_memory, system_memory);
-    address_space_memory.name = "memory";
+    address_space_init(&address_space_memory, system_memory, "memory");
 
     system_io = g_malloc(sizeof(*system_io));
     memory_region_init(system_io, "io", 65536);
-    address_space_init(&address_space_io, system_io);
-    address_space_io.name = "I/O";
+    address_space_init(&address_space_io, system_io, "I/O");
 
     memory_listener_register(&core_memory_listener, &address_space_memory);
     memory_listener_register(&io_memory_listener, &address_space_io);
diff --git a/hw/pci/pci.c b/hw/pci/pci.c
index 104fe71..61b681a 100644
--- a/hw/pci/pci.c
+++ b/hw/pci/pci.c
@@ -814,7 +814,8 @@ static PCIDevice *do_pci_register_device(PCIDevice *pci_dev, PCIBus *bus,
     memory_region_init_alias(&pci_dev->bus_master_enable_region, "bus master",
                              dma_as->root, 0, memory_region_size(dma_as->root));
     memory_region_set_enabled(&pci_dev->bus_master_enable_region, false);
-    address_space_init(&pci_dev->bus_master_as, &pci_dev->bus_master_enable_region);
+    address_space_init(&pci_dev->bus_master_as, &pci_dev->bus_master_enable_region,
+                       name);
 
     pci_dev->devfn = devfn;
     pstrcpy(pci_dev->name, sizeof(pci_dev->name), name);
diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c
index 459398c..04e8362 100644
--- a/hw/ppc/spapr_pci.c
+++ b/hw/ppc/spapr_pci.c
@@ -650,7 +650,9 @@ static int spapr_phb_init(SysBusDevice *s)
         fprintf(stderr, "Unable to create TCE table for %s\n", sphb->dtbusname);
         return -1;
     }
-    address_space_init(&sphb->iommu_as, spapr_tce_get_iommu(sphb->tcet));
+    address_space_init(&sphb->iommu_as, spapr_tce_get_iommu(sphb->tcet),
+                       sphb->dtbusname);
+
     pci_setup_iommu(bus, spapr_pci_dma_iommu, sphb);
 
     QLIST_INSERT_HEAD(&spapr->phbs, sphb, list);
diff --git a/hw/ppc/spapr_vio.c b/hw/ppc/spapr_vio.c
index 459c9f2..3c5a655 100644
--- a/hw/ppc/spapr_vio.c
+++ b/hw/ppc/spapr_vio.c
@@ -454,7 +454,7 @@ static int spapr_vio_busdev_init(DeviceState *qdev)
     if (pc->rtce_window_size) {
         uint32_t liobn = SPAPR_VIO_BASE_LIOBN | dev->reg;
         dev->tcet = spapr_tce_new_table(liobn, pc->rtce_window_size);
-        address_space_init(&dev->as, spapr_tce_get_iommu(dev->tcet));
+        address_space_init(&dev->as, spapr_tce_get_iommu(dev->tcet), qdev->id);
     }
 
     return pc->init(dev);
diff --git a/include/exec/memory.h b/include/exec/memory.h
index 75fa2b6..3598c4f 100644
--- a/include/exec/memory.h
+++ b/include/exec/memory.h
@@ -192,7 +192,7 @@ struct MemoryRegionPortio {
  */
 struct AddressSpace {
     /* All fields are private. */
-    const char *name;
+    char *name;
     MemoryRegion *root;
     struct FlatView *current_map;
     int ioeventfd_nb;
@@ -892,8 +892,10 @@ void mtree_info(fprintf_function mon_printf, void *f);
  *
  * @as: an uninitialized #AddressSpace
  * @root: a #MemoryRegion that routes addesses for the address space
+ * @name: an address space name.  The name is only used for debugging
+ *        output.
  */
-void address_space_init(AddressSpace *as, MemoryRegion *root);
+void address_space_init(AddressSpace *as, MemoryRegion *root, const char *name);
 
 
 /**
diff --git a/memory.c b/memory.c
index 2a94d7d..8e99b8a 100644
--- a/memory.c
+++ b/memory.c
@@ -1657,7 +1657,7 @@ void memory_listener_unregister(MemoryListener *listener)
     QTAILQ_REMOVE(&memory_listeners, listener, link);
 }
 
-void address_space_init(AddressSpace *as, MemoryRegion *root)
+void address_space_init(AddressSpace *as, MemoryRegion *root, const char *name)
 {
     memory_region_transaction_begin();
     as->root = root;
@@ -1666,7 +1666,7 @@ void address_space_init(AddressSpace *as, MemoryRegion *root)
     as->ioeventfd_nb = 0;
     as->ioeventfds = NULL;
     QTAILQ_INSERT_TAIL(&address_spaces, as, address_spaces_link);
-    as->name = NULL;
+    as->name = g_strdup(name ? name : "anonymous");
     address_space_init_dispatch(as);
     memory_region_update_pending |= root->enabled;
     memory_region_transaction_commit();
@@ -1681,6 +1681,7 @@ void address_space_destroy(AddressSpace *as)
     QTAILQ_REMOVE(&address_spaces, as, address_spaces_link);
     address_space_destroy_dispatch(as);
     flatview_destroy(as->current_map);
+    g_free(as->name);
     g_free(as->current_map);
     g_free(as->ioeventfds);
 }
@@ -1807,9 +1808,6 @@ void mtree_info(fprintf_function mon_printf, void *f)
     QTAILQ_INIT(&ml_head);
 
     QTAILQ_FOREACH(as, &address_spaces, address_spaces_link) {
-        if (!as->name) {
-            continue;
-        }
         mon_printf(f, "%s\n", as->name);
         mtree_print_mr(mon_printf, f, as->root, 0, 0, &ml_head);
     }
commit df32fd1c9f53dd3b7abd28e29f851965039eabda
Author: Paolo Bonzini <pbonzini at redhat.com>
Date:   Wed Apr 10 18:15:49 2013 +0200

    dma: eliminate DMAContext
    
    The DMAContext is a simple pointer to an AddressSpace that is now always
    already available.  Make everyone hold the address space directly,
    and clean up the DMA API to use the AddressSpace directly.
    
    Reviewed-by: Peter Maydell <peter.maydell at linaro.org>
    Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>

diff --git a/dma-helpers.c b/dma-helpers.c
index c53705a..5afba47 100644
--- a/dma-helpers.c
+++ b/dma-helpers.c
@@ -14,11 +14,9 @@
 
 /* #define DEBUG_IOMMU */
 
-int dma_memory_set(DMAContext *dma, dma_addr_t addr, uint8_t c, dma_addr_t len)
+int dma_memory_set(AddressSpace *as, dma_addr_t addr, uint8_t c, dma_addr_t len)
 {
-    AddressSpace *as = dma->as;
-
-    dma_barrier(dma, DMA_DIRECTION_FROM_DEVICE);
+    dma_barrier(as, DMA_DIRECTION_FROM_DEVICE);
 
 #define FILLBUF_SIZE 512
     uint8_t fillbuf[FILLBUF_SIZE];
@@ -36,13 +34,13 @@ int dma_memory_set(DMAContext *dma, dma_addr_t addr, uint8_t c, dma_addr_t len)
     return error;
 }
 
-void qemu_sglist_init(QEMUSGList *qsg, int alloc_hint, DMAContext *dma)
+void qemu_sglist_init(QEMUSGList *qsg, int alloc_hint, AddressSpace *as)
 {
     qsg->sg = g_malloc(alloc_hint * sizeof(ScatterGatherEntry));
     qsg->nsg = 0;
     qsg->nalloc = alloc_hint;
     qsg->size = 0;
-    qsg->dma = dma;
+    qsg->as = as;
 }
 
 void qemu_sglist_add(QEMUSGList *qsg, dma_addr_t base, dma_addr_t len)
@@ -102,7 +100,7 @@ static void dma_bdrv_unmap(DMAAIOCB *dbs)
     int i;
 
     for (i = 0; i < dbs->iov.niov; ++i) {
-        dma_memory_unmap(dbs->sg->dma, dbs->iov.iov[i].iov_base,
+        dma_memory_unmap(dbs->sg->as, dbs->iov.iov[i].iov_base,
                          dbs->iov.iov[i].iov_len, dbs->dir,
                          dbs->iov.iov[i].iov_len);
     }
@@ -150,7 +148,7 @@ static void dma_bdrv_cb(void *opaque, int ret)
     while (dbs->sg_cur_index < dbs->sg->nsg) {
         cur_addr = dbs->sg->sg[dbs->sg_cur_index].base + dbs->sg_cur_byte;
         cur_len = dbs->sg->sg[dbs->sg_cur_index].len - dbs->sg_cur_byte;
-        mem = dma_memory_map(dbs->sg->dma, cur_addr, &cur_len, dbs->dir);
+        mem = dma_memory_map(dbs->sg->as, cur_addr, &cur_len, dbs->dir);
         if (!mem)
             break;
         qemu_iovec_add(&dbs->iov, mem, cur_len);
@@ -247,7 +245,7 @@ static uint64_t dma_buf_rw(uint8_t *ptr, int32_t len, QEMUSGList *sg,
     while (len > 0) {
         ScatterGatherEntry entry = sg->sg[sg_cur_index++];
         int32_t xfer = MIN(len, entry.len);
-        dma_memory_rw(sg->dma, entry.base, ptr, xfer, dir);
+        dma_memory_rw(sg->as, entry.base, ptr, xfer, dir);
         ptr += xfer;
         len -= xfer;
         resid -= xfer;
@@ -271,11 +269,3 @@ void dma_acct_start(BlockDriverState *bs, BlockAcctCookie *cookie,
 {
     bdrv_acct_start(bs, cookie, sg->size, type);
 }
-
-void dma_context_init(DMAContext *dma, AddressSpace *as)
-{
-#ifdef DEBUG_IOMMU
-    fprintf(stderr, "dma_context_init(%p -> %p)\n", dma, as);
-#endif
-    dma->as = as;
-}
diff --git a/exec.c b/exec.c
index 89259c9..a18f833 100644
--- a/exec.c
+++ b/exec.c
@@ -63,7 +63,6 @@ static MemoryRegion *system_io;
 
 AddressSpace address_space_io;
 AddressSpace address_space_memory;
-DMAContext dma_context_memory;
 
 MemoryRegion io_mem_rom, io_mem_notdirty;
 static MemoryRegion io_mem_unassigned;
@@ -1839,8 +1838,6 @@ static void memory_map_init(void)
     memory_listener_register(&core_memory_listener, &address_space_memory);
     memory_listener_register(&io_memory_listener, &address_space_io);
     memory_listener_register(&tcg_memory_listener, &address_space_memory);
-
-    dma_context_init(&dma_context_memory, &address_space_memory);
 }
 
 MemoryRegion *get_system_memory(void)
diff --git a/hw/block/nvme.c b/hw/block/nvme.c
index 5db941c..58b0d91 100644
--- a/hw/block/nvme.c
+++ b/hw/block/nvme.c
@@ -85,7 +85,7 @@ static uint16_t nvme_map_prp(QEMUSGList *qsg, uint64_t prp1, uint64_t prp2,
         return NVME_INVALID_FIELD | NVME_DNR;
     }
 
-    qemu_sglist_init(qsg, num_prps, pci_dma_context(&n->parent_obj));
+    pci_dma_sglist_init(qsg, &n->parent_obj, num_prps);
     qemu_sglist_add(qsg, prp1, trans_len);
     len -= trans_len;
     if (len) {
diff --git a/hw/dma/pl330.c b/hw/dma/pl330.c
index 60f5299..044c087 100644
--- a/hw/dma/pl330.c
+++ b/hw/dma/pl330.c
@@ -1074,7 +1074,7 @@ static inline const PL330InsnDesc *pl330_fetch_insn(PL330Chan *ch)
     uint8_t opcode;
     int i;
 
-    dma_memory_read(&dma_context_memory, ch->pc, &opcode, 1);
+    dma_memory_read(&address_space_memory, ch->pc, &opcode, 1);
     for (i = 0; insn_desc[i].size; i++) {
         if ((opcode & insn_desc[i].opmask) == insn_desc[i].opcode) {
             return &insn_desc[i];
@@ -1088,7 +1088,7 @@ static inline void pl330_exec_insn(PL330Chan *ch, const PL330InsnDesc *insn)
     uint8_t buf[PL330_INSN_MAXSIZE];
 
     assert(insn->size <= PL330_INSN_MAXSIZE);
-    dma_memory_read(&dma_context_memory, ch->pc, buf, insn->size);
+    dma_memory_read(&address_space_memory, ch->pc, buf, insn->size);
     insn->exec(ch, buf[0], &buf[1], insn->size - 1);
 }
 
@@ -1153,7 +1153,7 @@ static int pl330_exec_cycle(PL330Chan *channel)
     if (q != NULL && q->len <= pl330_fifo_num_free(&s->fifo)) {
         int len = q->len - (q->addr & (q->len - 1));
 
-        dma_memory_read(&dma_context_memory, q->addr, buf, len);
+        dma_memory_read(&address_space_memory, q->addr, buf, len);
         if (PL330_ERR_DEBUG > 1) {
             DB_PRINT("PL330 read from memory @%08x (size = %08x):\n",
                       q->addr, len);
@@ -1185,7 +1185,7 @@ static int pl330_exec_cycle(PL330Chan *channel)
             fifo_res = pl330_fifo_get(&s->fifo, buf, len, q->tag);
         }
         if (fifo_res == PL330_FIFO_OK || q->z) {
-            dma_memory_write(&dma_context_memory, q->addr, buf, len);
+            dma_memory_write(&address_space_memory, q->addr, buf, len);
             if (PL330_ERR_DEBUG > 1) {
                 DB_PRINT("PL330 read from memory @%08x (size = %08x):\n",
                          q->addr, len);
diff --git a/hw/ide/ahci.c b/hw/ide/ahci.c
index eab6096..1adfa0b 100644
--- a/hw/ide/ahci.c
+++ b/hw/ide/ahci.c
@@ -597,7 +597,7 @@ static void ahci_write_fis_d2h(AHCIDevice *ad, uint8_t *cmd_fis)
     if (!cmd_fis) {
         /* map cmd_fis */
         uint64_t tbl_addr = le64_to_cpu(ad->cur_cmd->tbl_addr);
-        cmd_fis = dma_memory_map(ad->hba->dma, tbl_addr, &cmd_len,
+        cmd_fis = dma_memory_map(ad->hba->as, tbl_addr, &cmd_len,
                                  DMA_DIRECTION_TO_DEVICE);
         cmd_mapped = 1;
     }
@@ -630,7 +630,7 @@ static void ahci_write_fis_d2h(AHCIDevice *ad, uint8_t *cmd_fis)
     ahci_trigger_irq(ad->hba, ad, PORT_IRQ_D2H_REG_FIS);
 
     if (cmd_mapped) {
-        dma_memory_unmap(ad->hba->dma, cmd_fis, cmd_len,
+        dma_memory_unmap(ad->hba->as, cmd_fis, cmd_len,
                          DMA_DIRECTION_TO_DEVICE, cmd_len);
     }
 }
@@ -657,7 +657,7 @@ static int ahci_populate_sglist(AHCIDevice *ad, QEMUSGList *sglist, int offset)
     }
 
     /* map PRDT */
-    if (!(prdt = dma_memory_map(ad->hba->dma, prdt_addr, &prdt_len,
+    if (!(prdt = dma_memory_map(ad->hba->as, prdt_addr, &prdt_len,
                                 DMA_DIRECTION_TO_DEVICE))){
         DPRINTF(ad->port_no, "map failed\n");
         return -1;
@@ -691,7 +691,7 @@ static int ahci_populate_sglist(AHCIDevice *ad, QEMUSGList *sglist, int offset)
             goto out;
         }
 
-        qemu_sglist_init(sglist, (sglist_alloc_hint - off_idx), ad->hba->dma);
+        qemu_sglist_init(sglist, (sglist_alloc_hint - off_idx), ad->hba->as);
         qemu_sglist_add(sglist, le64_to_cpu(tbl[off_idx].addr + off_pos),
                         le32_to_cpu(tbl[off_idx].flags_size) + 1 - off_pos);
 
@@ -703,7 +703,7 @@ static int ahci_populate_sglist(AHCIDevice *ad, QEMUSGList *sglist, int offset)
     }
 
 out:
-    dma_memory_unmap(ad->hba->dma, prdt, prdt_len,
+    dma_memory_unmap(ad->hba->as, prdt, prdt_len,
                      DMA_DIRECTION_TO_DEVICE, prdt_len);
     return r;
 }
@@ -836,7 +836,7 @@ static int handle_cmd(AHCIState *s, int port, int slot)
     tbl_addr = le64_to_cpu(cmd->tbl_addr);
 
     cmd_len = 0x80;
-    cmd_fis = dma_memory_map(s->dma, tbl_addr, &cmd_len,
+    cmd_fis = dma_memory_map(s->as, tbl_addr, &cmd_len,
                              DMA_DIRECTION_FROM_DEVICE);
 
     if (!cmd_fis) {
@@ -963,7 +963,7 @@ static int handle_cmd(AHCIState *s, int port, int slot)
     }
 
 out:
-    dma_memory_unmap(s->dma, cmd_fis, cmd_len, DMA_DIRECTION_FROM_DEVICE,
+    dma_memory_unmap(s->as, cmd_fis, cmd_len, DMA_DIRECTION_FROM_DEVICE,
                      cmd_len);
 
     if (s->dev[port].port.ifs[0].status & (BUSY_STAT|DRQ_STAT)) {
@@ -1145,12 +1145,12 @@ static const IDEDMAOps ahci_dma_ops = {
     .reset = ahci_dma_reset,
 };
 
-void ahci_init(AHCIState *s, DeviceState *qdev, DMAContext *dma, int ports)
+void ahci_init(AHCIState *s, DeviceState *qdev, AddressSpace *as, int ports)
 {
     qemu_irq *irqs;
     int i;
 
-    s->dma = dma;
+    s->as = as;
     s->ports = ports;
     s->dev = g_malloc0(sizeof(AHCIDevice) * ports);
     ahci_reg_init(s);
diff --git a/hw/ide/ahci.h b/hw/ide/ahci.h
index 85f37fe..341a571 100644
--- a/hw/ide/ahci.h
+++ b/hw/ide/ahci.h
@@ -297,7 +297,7 @@ typedef struct AHCIState {
     uint32_t idp_index;     /* Current IDP index */
     int32_t ports;
     qemu_irq irq;
-    DMAContext *dma;
+    AddressSpace *as;
 } AHCIState;
 
 typedef struct AHCIPCIState {
@@ -338,7 +338,7 @@ typedef struct NCQFrame {
     uint8_t reserved10;
 } QEMU_PACKED NCQFrame;
 
-void ahci_init(AHCIState *s, DeviceState *qdev, DMAContext *dma, int ports);
+void ahci_init(AHCIState *s, DeviceState *qdev, AddressSpace *as, int ports);
 void ahci_uninit(AHCIState *s);
 
 void ahci_reset(AHCIState *s);
diff --git a/hw/ide/ich.c b/hw/ide/ich.c
index ed1f1a2..6c0c0c2 100644
--- a/hw/ide/ich.c
+++ b/hw/ide/ich.c
@@ -104,7 +104,7 @@ static int pci_ich9_ahci_init(PCIDevice *dev)
     uint8_t *sata_cap;
     d = DO_UPCAST(struct AHCIPCIState, card, dev);
 
-    ahci_init(&d->ahci, &dev->qdev, pci_dma_context(dev), 6);
+    ahci_init(&d->ahci, &dev->qdev, pci_get_address_space(dev), 6);
 
     pci_config_set_prog_interface(d->card.config, AHCI_PROGMODE_MAJOR_REV_1);
 
diff --git a/hw/ide/macio.c b/hw/ide/macio.c
index e1e4f41..a1952b0 100644
--- a/hw/ide/macio.c
+++ b/hw/ide/macio.c
@@ -71,7 +71,7 @@ static void pmac_ide_atapi_transfer_cb(void *opaque, int ret)
     s->io_buffer_size = io->len;
 
     qemu_sglist_init(&s->sg, io->len / MACIO_PAGE_SIZE + 1,
-                     &dma_context_memory);
+                     &address_space_memory);
     qemu_sglist_add(&s->sg, io->addr, io->len);
     io->addr += io->len;
     io->len = 0;
@@ -128,7 +128,7 @@ static void pmac_ide_transfer_cb(void *opaque, int ret)
     s->io_buffer_size = io->len;
 
     qemu_sglist_init(&s->sg, io->len / MACIO_PAGE_SIZE + 1,
-                     &dma_context_memory);
+                     &address_space_memory);
     qemu_sglist_add(&s->sg, io->addr, io->len);
     io->addr += io->len;
     io->len = 0;
diff --git a/hw/pci/pci.c b/hw/pci/pci.c
index ed9c8a1..104fe71 100644
--- a/hw/pci/pci.c
+++ b/hw/pci/pci.c
@@ -815,8 +815,6 @@ static PCIDevice *do_pci_register_device(PCIDevice *pci_dev, PCIBus *bus,
                              dma_as->root, 0, memory_region_size(dma_as->root));
     memory_region_set_enabled(&pci_dev->bus_master_enable_region, false);
     address_space_init(&pci_dev->bus_master_as, &pci_dev->bus_master_enable_region);
-    pci_dev->dma = g_new(DMAContext, 1);
-    dma_context_init(pci_dev->dma, &pci_dev->bus_master_as);
 
     pci_dev->devfn = devfn;
     pstrcpy(pci_dev->name, sizeof(pci_dev->name), name);
@@ -873,8 +871,6 @@ static void do_pci_unregister_device(PCIDevice *pci_dev)
 
     address_space_destroy(&pci_dev->bus_master_as);
     memory_region_destroy(&pci_dev->bus_master_enable_region);
-    g_free(pci_dev->dma);
-    pci_dev->dma = NULL;
 }
 
 static void pci_unregister_io_regions(PCIDevice *pci_dev)
diff --git a/hw/ppc/spapr_vio.c b/hw/ppc/spapr_vio.c
index 09cda64..459c9f2 100644
--- a/hw/ppc/spapr_vio.c
+++ b/hw/ppc/spapr_vio.c
@@ -455,7 +455,6 @@ static int spapr_vio_busdev_init(DeviceState *qdev)
         uint32_t liobn = SPAPR_VIO_BASE_LIOBN | dev->reg;
         dev->tcet = spapr_tce_new_table(liobn, pc->rtce_window_size);
         address_space_init(&dev->as, spapr_tce_get_iommu(dev->tcet));
-        dma_context_init(&dev->dma, &dev->as);
     }
 
     return pc->init(dev);
diff --git a/hw/scsi/megasas.c b/hw/scsi/megasas.c
index fe6550c..65ccb09 100644
--- a/hw/scsi/megasas.c
+++ b/hw/scsi/megasas.c
@@ -232,7 +232,7 @@ static int megasas_map_sgl(MegasasState *s, MegasasCmd *cmd, union mfi_sgl *sgl)
                                          MEGASAS_MAX_SGE);
         return iov_count;
     }
-    qemu_sglist_init(&cmd->qsg, iov_count, pci_dma_context(&s->dev));
+    pci_dma_sglist_init(&cmd->qsg, &s->dev, iov_count);
     for (i = 0; i < iov_count; i++) {
         dma_addr_t iov_pa, iov_size_p;
 
@@ -628,7 +628,7 @@ static int megasas_map_dcmd(MegasasState *s, MegasasCmd *cmd)
     }
     iov_pa = megasas_sgl_get_addr(cmd, &cmd->frame->dcmd.sgl);
     iov_size = megasas_sgl_get_len(cmd, &cmd->frame->dcmd.sgl);
-    qemu_sglist_init(&cmd->qsg, 1, pci_dma_context(&s->dev));
+    pci_dma_sglist_init(&cmd->qsg, &s->dev, 1);
     qemu_sglist_add(&cmd->qsg, iov_pa, iov_size);
     cmd->iov_size = iov_size;
     return cmd->iov_size;
diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c
index 08dd3f3..b8a0abf 100644
--- a/hw/scsi/virtio-scsi.c
+++ b/hw/scsi/virtio-scsi.c
@@ -80,7 +80,7 @@ static void virtio_scsi_bad_req(void)
 static void qemu_sgl_init_external(QEMUSGList *qsgl, struct iovec *sg,
                                    hwaddr *addr, int num)
 {
-    qemu_sglist_init(qsgl, num, &dma_context_memory);
+    qemu_sglist_init(qsgl, num, &address_space_memory);
     while (num--) {
         qemu_sglist_add(qsgl, *(addr++), (sg++)->iov_len);
     }
diff --git a/hw/scsi/vmw_pvscsi.c b/hw/scsi/vmw_pvscsi.c
index 446f723..7cf4044 100644
--- a/hw/scsi/vmw_pvscsi.c
+++ b/hw/scsi/vmw_pvscsi.c
@@ -617,7 +617,7 @@ pvscsi_build_sglist(PVSCSIState *s, PVSCSIRequest *r)
 {
     PCIDevice *d = PCI_DEVICE(s);
 
-    qemu_sglist_init(&r->sgl, 1, pci_dma_context(d));
+    pci_dma_sglist_init(&r->sgl, d, 1);
     if (r->req.flags & PVSCSI_FLAG_CMD_WITH_SG_LIST) {
         pvscsi_convert_sglist(r);
     } else {
diff --git a/hw/sd/sdhci.c b/hw/sd/sdhci.c
index e64899c..0065067 100644
--- a/hw/sd/sdhci.c
+++ b/hw/sd/sdhci.c
@@ -496,7 +496,7 @@ static void sdhci_sdma_transfer_multi_blocks(SDHCIState *s)
                     s->blkcnt--;
                 }
             }
-            dma_memory_write(&dma_context_memory, s->sdmasysad,
+            dma_memory_write(&address_space_memory, s->sdmasysad,
                              &s->fifo_buffer[begin], s->data_count - begin);
             s->sdmasysad += s->data_count - begin;
             if (s->data_count == block_size) {
@@ -518,7 +518,7 @@ static void sdhci_sdma_transfer_multi_blocks(SDHCIState *s)
                 s->data_count = block_size;
                 boundary_count -= block_size - begin;
             }
-            dma_memory_read(&dma_context_memory, s->sdmasysad,
+            dma_memory_read(&address_space_memory, s->sdmasysad,
                             &s->fifo_buffer[begin], s->data_count);
             s->sdmasysad += s->data_count - begin;
             if (s->data_count == block_size) {
@@ -557,10 +557,10 @@ static void sdhci_sdma_transfer_single_block(SDHCIState *s)
         for (n = 0; n < datacnt; n++) {
             s->fifo_buffer[n] = sd_read_data(s->card);
         }
-        dma_memory_write(&dma_context_memory, s->sdmasysad, s->fifo_buffer,
+        dma_memory_write(&address_space_memory, s->sdmasysad, s->fifo_buffer,
                          datacnt);
     } else {
-        dma_memory_read(&dma_context_memory, s->sdmasysad, s->fifo_buffer,
+        dma_memory_read(&address_space_memory, s->sdmasysad, s->fifo_buffer,
                         datacnt);
         for (n = 0; n < datacnt; n++) {
             sd_write_data(s->card, s->fifo_buffer[n]);
@@ -588,7 +588,7 @@ static void get_adma_description(SDHCIState *s, ADMADescr *dscr)
     hwaddr entry_addr = (hwaddr)s->admasysaddr;
     switch (SDHC_DMA_TYPE(s->hostctl)) {
     case SDHC_CTRL_ADMA2_32:
-        dma_memory_read(&dma_context_memory, entry_addr, (uint8_t *)&adma2,
+        dma_memory_read(&address_space_memory, entry_addr, (uint8_t *)&adma2,
                         sizeof(adma2));
         adma2 = le64_to_cpu(adma2);
         /* The spec does not specify endianness of descriptor table.
@@ -600,7 +600,7 @@ static void get_adma_description(SDHCIState *s, ADMADescr *dscr)
         dscr->incr = 8;
         break;
     case SDHC_CTRL_ADMA1_32:
-        dma_memory_read(&dma_context_memory, entry_addr, (uint8_t *)&adma1,
+        dma_memory_read(&address_space_memory, entry_addr, (uint8_t *)&adma1,
                         sizeof(adma1));
         adma1 = le32_to_cpu(adma1);
         dscr->addr = (hwaddr)(adma1 & 0xFFFFF000);
@@ -613,12 +613,12 @@ static void get_adma_description(SDHCIState *s, ADMADescr *dscr)
         }
         break;
     case SDHC_CTRL_ADMA2_64:
-        dma_memory_read(&dma_context_memory, entry_addr,
+        dma_memory_read(&address_space_memory, entry_addr,
                         (uint8_t *)(&dscr->attr), 1);
-        dma_memory_read(&dma_context_memory, entry_addr + 2,
+        dma_memory_read(&address_space_memory, entry_addr + 2,
                         (uint8_t *)(&dscr->length), 2);
         dscr->length = le16_to_cpu(dscr->length);
-        dma_memory_read(&dma_context_memory, entry_addr + 4,
+        dma_memory_read(&address_space_memory, entry_addr + 4,
                         (uint8_t *)(&dscr->addr), 8);
         dscr->attr = le64_to_cpu(dscr->attr);
         dscr->attr &= 0xfffffff8;
@@ -678,7 +678,7 @@ static void sdhci_do_adma(SDHCIState *s)
                         s->data_count = block_size;
                         length -= block_size - begin;
                     }
-                    dma_memory_write(&dma_context_memory, dscr.addr,
+                    dma_memory_write(&address_space_memory, dscr.addr,
                                      &s->fifo_buffer[begin],
                                      s->data_count - begin);
                     dscr.addr += s->data_count - begin;
@@ -702,7 +702,7 @@ static void sdhci_do_adma(SDHCIState *s)
                         s->data_count = block_size;
                         length -= block_size - begin;
                     }
-                    dma_memory_read(&dma_context_memory, dscr.addr,
+                    dma_memory_read(&address_space_memory, dscr.addr,
                                     &s->fifo_buffer[begin], s->data_count);
                     dscr.addr += s->data_count - begin;
                     if (s->data_count == block_size) {
diff --git a/hw/usb/hcd-ehci-pci.c b/hw/usb/hcd-ehci-pci.c
index 0eb7826..f1b5f5d 100644
--- a/hw/usb/hcd-ehci-pci.c
+++ b/hw/usb/hcd-ehci-pci.c
@@ -63,7 +63,7 @@ static int usb_ehci_pci_initfn(PCIDevice *dev)
     s->caps[0x09] = 0x68;        /* EECP */
 
     s->irq = dev->irq[3];
-    s->dma = pci_dma_context(dev);
+    s->as = pci_get_address_space(dev);
 
     s->capsbase = 0x00;
     s->opregbase = 0x20;
@@ -86,7 +86,7 @@ static void usb_ehci_pci_write_config(PCIDevice *dev, uint32_t addr,
         return;
     }
     busmaster = pci_get_word(dev->config + PCI_COMMAND) & PCI_COMMAND_MASTER;
-    i->ehci.dma = busmaster ? pci_dma_context(dev) : NULL;
+    i->ehci.as = busmaster ? pci_get_address_space(dev) : &address_space_memory;
 }
 
 static Property ehci_pci_properties[] = {
diff --git a/hw/usb/hcd-ehci-sysbus.c b/hw/usb/hcd-ehci-sysbus.c
index b68a66a..f9e4fd3 100644
--- a/hw/usb/hcd-ehci-sysbus.c
+++ b/hw/usb/hcd-ehci-sysbus.c
@@ -40,7 +40,7 @@ static int usb_ehci_sysbus_initfn(SysBusDevice *dev)
 
     s->capsbase = sec->capsbase;
     s->opregbase = sec->opregbase;
-    s->dma = &dma_context_memory;
+    s->as = &address_space_memory;
 
     usb_ehci_initfn(s, DEVICE(dev));
     sysbus_init_irq(dev, &s->irq);
diff --git a/hw/usb/hcd-ehci.c b/hw/usb/hcd-ehci.c
index 0d3799d..1ad2159 100644
--- a/hw/usb/hcd-ehci.c
+++ b/hw/usb/hcd-ehci.c
@@ -446,7 +446,7 @@ static inline int get_dwords(EHCIState *ehci, uint32_t addr,
 {
     int i;
 
-    if (!ehci->dma) {
+    if (!ehci->as) {
         ehci_raise_irq(ehci, USBSTS_HSE);
         ehci->usbcmd &= ~USBCMD_RUNSTOP;
         trace_usb_ehci_dma_error();
@@ -454,7 +454,7 @@ static inline int get_dwords(EHCIState *ehci, uint32_t addr,
     }
 
     for (i = 0; i < num; i++, buf++, addr += sizeof(*buf)) {
-        dma_memory_read(ehci->dma, addr, buf, sizeof(*buf));
+        dma_memory_read(ehci->as, addr, buf, sizeof(*buf));
         *buf = le32_to_cpu(*buf);
     }
 
@@ -467,7 +467,7 @@ static inline int put_dwords(EHCIState *ehci, uint32_t addr,
 {
     int i;
 
-    if (!ehci->dma) {
+    if (!ehci->as) {
         ehci_raise_irq(ehci, USBSTS_HSE);
         ehci->usbcmd &= ~USBCMD_RUNSTOP;
         trace_usb_ehci_dma_error();
@@ -476,7 +476,7 @@ static inline int put_dwords(EHCIState *ehci, uint32_t addr,
 
     for (i = 0; i < num; i++, buf++, addr += sizeof(*buf)) {
         uint32_t tmp = cpu_to_le32(*buf);
-        dma_memory_write(ehci->dma, addr, &tmp, sizeof(tmp));
+        dma_memory_write(ehci->as, addr, &tmp, sizeof(tmp));
     }
 
     return num;
@@ -1245,7 +1245,7 @@ static int ehci_init_transfer(EHCIPacket *p)
     cpage  = get_field(p->qtd.token, QTD_TOKEN_CPAGE);
     bytes  = get_field(p->qtd.token, QTD_TOKEN_TBYTES);
     offset = p->qtd.bufptr[0] & ~QTD_BUFPTR_MASK;
-    qemu_sglist_init(&p->sgl, 5, p->queue->ehci->dma);
+    qemu_sglist_init(&p->sgl, 5, p->queue->ehci->as);
 
     while (bytes > 0) {
         if (cpage > 4) {
@@ -1484,7 +1484,7 @@ static int ehci_process_itd(EHCIState *ehci,
                 return -1;
             }
 
-            qemu_sglist_init(&ehci->isgl, 2, ehci->dma);
+            qemu_sglist_init(&ehci->isgl, 2, ehci->as);
             if (off + len > 4096) {
                 /* transfer crosses page border */
                 uint32_t len2 = off + len - 4096;
diff --git a/hw/usb/hcd-ehci.h b/hw/usb/hcd-ehci.h
index e95bb7e..2fcb92f 100644
--- a/hw/usb/hcd-ehci.h
+++ b/hw/usb/hcd-ehci.h
@@ -261,7 +261,7 @@ struct EHCIState {
     USBBus bus;
     qemu_irq irq;
     MemoryRegion mem;
-    DMAContext *dma;
+    AddressSpace *as;
     MemoryRegion mem_caps;
     MemoryRegion mem_opreg;
     MemoryRegion mem_ports;
diff --git a/hw/usb/hcd-ohci.c b/hw/usb/hcd-ohci.c
index 51241cd..5513924 100644
--- a/hw/usb/hcd-ohci.c
+++ b/hw/usb/hcd-ohci.c
@@ -62,7 +62,7 @@ typedef struct {
     USBBus bus;
     qemu_irq irq;
     MemoryRegion mem;
-    DMAContext *dma;
+    AddressSpace *as;
     int num_ports;
     const char *name;
 
@@ -508,7 +508,7 @@ static inline int get_dwords(OHCIState *ohci,
     addr += ohci->localmem_base;
 
     for (i = 0; i < num; i++, buf++, addr += sizeof(*buf)) {
-        dma_memory_read(ohci->dma, addr, buf, sizeof(*buf));
+        dma_memory_read(ohci->as, addr, buf, sizeof(*buf));
         *buf = le32_to_cpu(*buf);
     }
 
@@ -525,7 +525,7 @@ static inline int put_dwords(OHCIState *ohci,
 
     for (i = 0; i < num; i++, buf++, addr += sizeof(*buf)) {
         uint32_t tmp = cpu_to_le32(*buf);
-        dma_memory_write(ohci->dma, addr, &tmp, sizeof(tmp));
+        dma_memory_write(ohci->as, addr, &tmp, sizeof(tmp));
     }
 
     return 1;
@@ -540,7 +540,7 @@ static inline int get_words(OHCIState *ohci,
     addr += ohci->localmem_base;
 
     for (i = 0; i < num; i++, buf++, addr += sizeof(*buf)) {
-        dma_memory_read(ohci->dma, addr, buf, sizeof(*buf));
+        dma_memory_read(ohci->as, addr, buf, sizeof(*buf));
         *buf = le16_to_cpu(*buf);
     }
 
@@ -557,7 +557,7 @@ static inline int put_words(OHCIState *ohci,
 
     for (i = 0; i < num; i++, buf++, addr += sizeof(*buf)) {
         uint16_t tmp = cpu_to_le16(*buf);
-        dma_memory_write(ohci->dma, addr, &tmp, sizeof(tmp));
+        dma_memory_write(ohci->as, addr, &tmp, sizeof(tmp));
     }
 
     return 1;
@@ -585,7 +585,7 @@ static inline int ohci_read_iso_td(OHCIState *ohci,
 static inline int ohci_read_hcca(OHCIState *ohci,
                                  dma_addr_t addr, struct ohci_hcca *hcca)
 {
-    dma_memory_read(ohci->dma, addr + ohci->localmem_base, hcca, sizeof(*hcca));
+    dma_memory_read(ohci->as, addr + ohci->localmem_base, hcca, sizeof(*hcca));
     return 1;
 }
 
@@ -617,7 +617,7 @@ static inline int ohci_put_iso_td(OHCIState *ohci,
 static inline int ohci_put_hcca(OHCIState *ohci,
                                 dma_addr_t addr, struct ohci_hcca *hcca)
 {
-    dma_memory_write(ohci->dma,
+    dma_memory_write(ohci->as,
                      addr + ohci->localmem_base + HCCA_WRITEBACK_OFFSET,
                      (char *)hcca + HCCA_WRITEBACK_OFFSET,
                      HCCA_WRITEBACK_SIZE);
@@ -634,12 +634,12 @@ static void ohci_copy_td(OHCIState *ohci, struct ohci_td *td,
     n = 0x1000 - (ptr & 0xfff);
     if (n > len)
         n = len;
-    dma_memory_rw(ohci->dma, ptr + ohci->localmem_base, buf, n, dir);
+    dma_memory_rw(ohci->as, ptr + ohci->localmem_base, buf, n, dir);
     if (n == len)
         return;
     ptr = td->be & ~0xfffu;
     buf += n;
-    dma_memory_rw(ohci->dma, ptr + ohci->localmem_base, buf, len - n, dir);
+    dma_memory_rw(ohci->as, ptr + ohci->localmem_base, buf, len - n, dir);
 }
 
 /* Read/Write the contents of an ISO TD from/to main memory.  */
@@ -653,12 +653,12 @@ static void ohci_copy_iso_td(OHCIState *ohci,
     n = 0x1000 - (ptr & 0xfff);
     if (n > len)
         n = len;
-    dma_memory_rw(ohci->dma, ptr + ohci->localmem_base, buf, n, dir);
+    dma_memory_rw(ohci->as, ptr + ohci->localmem_base, buf, n, dir);
     if (n == len)
         return;
     ptr = end_addr & ~0xfffu;
     buf += n;
-    dma_memory_rw(ohci->dma, ptr + ohci->localmem_base, buf, len - n, dir);
+    dma_memory_rw(ohci->as, ptr + ohci->localmem_base, buf, len - n, dir);
 }
 
 static void ohci_process_lists(OHCIState *ohci, int completion);
@@ -1788,11 +1788,11 @@ static USBBusOps ohci_bus_ops = {
 static int usb_ohci_init(OHCIState *ohci, DeviceState *dev,
                          int num_ports, dma_addr_t localmem_base,
                          char *masterbus, uint32_t firstport,
-                         DMAContext *dma)
+                         AddressSpace *as)
 {
     int i;
 
-    ohci->dma = dma;
+    ohci->as = as;
 
     if (usb_frame_time == 0) {
 #ifdef OHCI_TIME_WARP
@@ -1859,7 +1859,7 @@ static int usb_ohci_initfn_pci(struct PCIDevice *dev)
 
     if (usb_ohci_init(&ohci->state, &dev->qdev, ohci->num_ports, 0,
                       ohci->masterbus, ohci->firstport,
-                      pci_dma_context(dev)) != 0) {
+                      pci_get_address_space(dev)) != 0) {
         return -1;
     }
     ohci->state.irq = ohci->pci_dev.irq[0];
@@ -1882,7 +1882,7 @@ static int ohci_init_pxa(SysBusDevice *dev)
 
     /* Cannot fail as we pass NULL for masterbus */
     usb_ohci_init(&s->ohci, &dev->qdev, s->num_ports, s->dma_offset, NULL, 0,
-                  &dma_context_memory);
+                  &address_space_memory);
     sysbus_init_irq(dev, &s->ohci.irq);
     sysbus_init_mmio(dev, &s->ohci.mem);
 
diff --git a/hw/usb/libhw.c b/hw/usb/libhw.c
index d2d4b51..8df11c4 100644
--- a/hw/usb/libhw.c
+++ b/hw/usb/libhw.c
@@ -37,7 +37,7 @@ int usb_packet_map(USBPacket *p, QEMUSGList *sgl)
 
         while (len) {
             dma_addr_t xlen = len;
-            mem = dma_memory_map(sgl->dma, base, &xlen, dir);
+            mem = dma_memory_map(sgl->as, base, &xlen, dir);
             if (!mem) {
                 goto err;
             }
@@ -63,7 +63,7 @@ void usb_packet_unmap(USBPacket *p, QEMUSGList *sgl)
     int i;
 
     for (i = 0; i < p->iov.niov; i++) {
-        dma_memory_unmap(sgl->dma, p->iov.iov[i].iov_base,
+        dma_memory_unmap(sgl->as, p->iov.iov[i].iov_base,
                          p->iov.iov[i].iov_len, dir,
                          p->iov.iov[i].iov_len);
     }
diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
index 3a85bce..6ef1f97 100644
--- a/include/hw/pci/pci.h
+++ b/include/hw/pci/pci.h
@@ -242,7 +242,6 @@ struct PCIDevice {
     PCIIORegion io_regions[PCI_NUM_REGIONS];
     AddressSpace bus_master_as;
     MemoryRegion bus_master_enable_region;
-    DMAContext *dma;
 
     /* do not access the following fields */
     PCIConfigReadFunc *config_read;
@@ -639,15 +638,15 @@ static inline uint32_t pci_config_size(const PCIDevice *d)
 }
 
 /* DMA access functions */
-static inline DMAContext *pci_dma_context(PCIDevice *dev)
+static inline AddressSpace *pci_get_address_space(PCIDevice *dev)
 {
-    return dev->dma;
+    return &dev->bus_master_as;
 }
 
 static inline int pci_dma_rw(PCIDevice *dev, dma_addr_t addr,
                              void *buf, dma_addr_t len, DMADirection dir)
 {
-    dma_memory_rw(pci_dma_context(dev), addr, buf, len, dir);
+    dma_memory_rw(pci_get_address_space(dev), addr, buf, len, dir);
     return 0;
 }
 
@@ -667,12 +666,12 @@ static inline int pci_dma_write(PCIDevice *dev, dma_addr_t addr,
     static inline uint##_bits##_t ld##_l##_pci_dma(PCIDevice *dev,      \
                                                    dma_addr_t addr)     \
     {                                                                   \
-        return ld##_l##_dma(pci_dma_context(dev), addr);                \
+        return ld##_l##_dma(pci_get_address_space(dev), addr);          \
     }                                                                   \
     static inline void st##_s##_pci_dma(PCIDevice *dev,                 \
                                         dma_addr_t addr, uint##_bits##_t val) \
     {                                                                   \
-        st##_s##_dma(pci_dma_context(dev), addr, val);                  \
+        st##_s##_dma(pci_get_address_space(dev), addr, val);            \
     }
 
 PCI_DMA_DEFINE_LDST(ub, b, 8);
@@ -690,20 +689,20 @@ static inline void *pci_dma_map(PCIDevice *dev, dma_addr_t addr,
 {
     void *buf;
 
-    buf = dma_memory_map(pci_dma_context(dev), addr, plen, dir);
+    buf = dma_memory_map(pci_get_address_space(dev), addr, plen, dir);
     return buf;
 }
 
 static inline void pci_dma_unmap(PCIDevice *dev, void *buffer, dma_addr_t len,
                                  DMADirection dir, dma_addr_t access_len)
 {
-    dma_memory_unmap(pci_dma_context(dev), buffer, len, dir, access_len);
+    dma_memory_unmap(pci_get_address_space(dev), buffer, len, dir, access_len);
 }
 
 static inline void pci_dma_sglist_init(QEMUSGList *qsg, PCIDevice *dev,
                                        int alloc_hint)
 {
-    qemu_sglist_init(qsg, alloc_hint, pci_dma_context(dev));
+    qemu_sglist_init(qsg, alloc_hint, pci_get_address_space(dev));
 }
 
 extern const VMStateDescription vmstate_pci_device;
diff --git a/include/hw/ppc/spapr_vio.h b/include/hw/ppc/spapr_vio.h
index 2de58f1..3609327 100644
--- a/include/hw/ppc/spapr_vio.h
+++ b/include/hw/ppc/spapr_vio.h
@@ -64,7 +64,6 @@ struct VIOsPAPRDevice {
     target_ulong signal_state;
     VIOsPAPR_CRQ crq;
     AddressSpace as;
-    DMAContext dma;
     sPAPRTCETable *tcet;
 };
 
@@ -93,35 +92,35 @@ static inline qemu_irq spapr_vio_qirq(VIOsPAPRDevice *dev)
 static inline bool spapr_vio_dma_valid(VIOsPAPRDevice *dev, uint64_t taddr,
                                        uint32_t size, DMADirection dir)
 {
-    return dma_memory_valid(&dev->dma, taddr, size, dir);
+    return dma_memory_valid(&dev->as, taddr, size, dir);
 }
 
 static inline int spapr_vio_dma_read(VIOsPAPRDevice *dev, uint64_t taddr,
                                      void *buf, uint32_t size)
 {
-    return (dma_memory_read(&dev->dma, taddr, buf, size) != 0) ?
+    return (dma_memory_read(&dev->as, taddr, buf, size) != 0) ?
         H_DEST_PARM : H_SUCCESS;
 }
 
 static inline int spapr_vio_dma_write(VIOsPAPRDevice *dev, uint64_t taddr,
                                       const void *buf, uint32_t size)
 {
-    return (dma_memory_write(&dev->dma, taddr, buf, size) != 0) ?
+    return (dma_memory_write(&dev->as, taddr, buf, size) != 0) ?
         H_DEST_PARM : H_SUCCESS;
 }
 
 static inline int spapr_vio_dma_set(VIOsPAPRDevice *dev, uint64_t taddr,
                                     uint8_t c, uint32_t size)
 {
-    return (dma_memory_set(&dev->dma, taddr, c, size) != 0) ?
+    return (dma_memory_set(&dev->as, taddr, c, size) != 0) ?
         H_DEST_PARM : H_SUCCESS;
 }
 
-#define vio_stb(_dev, _addr, _val) (stb_dma(&(_dev)->dma, (_addr), (_val)))
-#define vio_sth(_dev, _addr, _val) (stw_be_dma(&(_dev)->dma, (_addr), (_val)))
-#define vio_stl(_dev, _addr, _val) (stl_be_dma(&(_dev)->dma, (_addr), (_val)))
-#define vio_stq(_dev, _addr, _val) (stq_be_dma(&(_dev)->dma, (_addr), (_val)))
-#define vio_ldq(_dev, _addr) (ldq_be_dma(&(_dev)->dma, (_addr)))
+#define vio_stb(_dev, _addr, _val) (stb_dma(&(_dev)->as, (_addr), (_val)))
+#define vio_sth(_dev, _addr, _val) (stw_be_dma(&(_dev)->as, (_addr), (_val)))
+#define vio_stl(_dev, _addr, _val) (stl_be_dma(&(_dev)->as, (_addr), (_val)))
+#define vio_stq(_dev, _addr, _val) (stq_be_dma(&(_dev)->as, (_addr), (_val)))
+#define vio_ldq(_dev, _addr) (ldq_be_dma(&(_dev)->as, (_addr)))
 
 int spapr_vio_send_crq(VIOsPAPRDevice *dev, uint8_t *crq);
 
diff --git a/include/sysemu/dma.h b/include/sysemu/dma.h
index 3317dcf..031d1f5 100644
--- a/include/sysemu/dma.h
+++ b/include/sysemu/dma.h
@@ -12,11 +12,11 @@
 
 #include <stdio.h>
 #include "exec/memory.h"
+#include "exec/address-spaces.h"
 #include "hw/hw.h"
 #include "block/block.h"
 #include "sysemu/kvm.h"
 
-typedef struct DMAContext DMAContext;
 typedef struct ScatterGatherEntry ScatterGatherEntry;
 
 typedef enum {
@@ -29,7 +29,7 @@ struct QEMUSGList {
     int nsg;
     int nalloc;
     size_t size;
-    DMAContext *dma;
+    AddressSpace *as;
 };
 
 #ifndef CONFIG_USER_ONLY
@@ -46,16 +46,7 @@ typedef uint64_t dma_addr_t;
 #define DMA_ADDR_BITS 64
 #define DMA_ADDR_FMT "%" PRIx64
 
-struct DMAContext {
-    AddressSpace *as;
-};
-
-/* A global DMA context corresponding to the address_space_memory
- * AddressSpace, for sysbus devices which do DMA.
- */
-extern DMAContext dma_context_memory;
-
-static inline void dma_barrier(DMAContext *dma, DMADirection dir)
+static inline void dma_barrier(AddressSpace *as, DMADirection dir)
 {
     /*
      * This is called before DMA read and write operations
@@ -83,105 +74,105 @@ static inline void dma_barrier(DMAContext *dma, DMADirection dir)
 /* Checks that the given range of addresses is valid for DMA.  This is
  * useful for certain cases, but usually you should just use
  * dma_memory_{read,write}() and check for errors */
-static inline bool dma_memory_valid(DMAContext *dma,
+static inline bool dma_memory_valid(AddressSpace *as,
                                     dma_addr_t addr, dma_addr_t len,
                                     DMADirection dir)
 {
-    return address_space_access_valid(dma->as, addr, len,
+    return address_space_access_valid(as, addr, len,
                                       dir == DMA_DIRECTION_FROM_DEVICE);
 }
 
-static inline int dma_memory_rw_relaxed(DMAContext *dma, dma_addr_t addr,
+static inline int dma_memory_rw_relaxed(AddressSpace *as, dma_addr_t addr,
                                         void *buf, dma_addr_t len,
                                         DMADirection dir)
 {
-    return address_space_rw(dma->as, addr, buf, len, dir == DMA_DIRECTION_FROM_DEVICE);
+    return address_space_rw(as, addr, buf, len, dir == DMA_DIRECTION_FROM_DEVICE);
 }
 
-static inline int dma_memory_read_relaxed(DMAContext *dma, dma_addr_t addr,
+static inline int dma_memory_read_relaxed(AddressSpace *as, dma_addr_t addr,
                                           void *buf, dma_addr_t len)
 {
-    return dma_memory_rw_relaxed(dma, addr, buf, len, DMA_DIRECTION_TO_DEVICE);
+    return dma_memory_rw_relaxed(as, addr, buf, len, DMA_DIRECTION_TO_DEVICE);
 }
 
-static inline int dma_memory_write_relaxed(DMAContext *dma, dma_addr_t addr,
+static inline int dma_memory_write_relaxed(AddressSpace *as, dma_addr_t addr,
                                            const void *buf, dma_addr_t len)
 {
-    return dma_memory_rw_relaxed(dma, addr, (void *)buf, len,
+    return dma_memory_rw_relaxed(as, addr, (void *)buf, len,
                                  DMA_DIRECTION_FROM_DEVICE);
 }
 
-static inline int dma_memory_rw(DMAContext *dma, dma_addr_t addr,
+static inline int dma_memory_rw(AddressSpace *as, dma_addr_t addr,
                                 void *buf, dma_addr_t len,
                                 DMADirection dir)
 {
-    dma_barrier(dma, dir);
+    dma_barrier(as, dir);
 
-    return dma_memory_rw_relaxed(dma, addr, buf, len, dir);
+    return dma_memory_rw_relaxed(as, addr, buf, len, dir);
 }
 
-static inline int dma_memory_read(DMAContext *dma, dma_addr_t addr,
+static inline int dma_memory_read(AddressSpace *as, dma_addr_t addr,
                                   void *buf, dma_addr_t len)
 {
-    return dma_memory_rw(dma, addr, buf, len, DMA_DIRECTION_TO_DEVICE);
+    return dma_memory_rw(as, addr, buf, len, DMA_DIRECTION_TO_DEVICE);
 }
 
-static inline int dma_memory_write(DMAContext *dma, dma_addr_t addr,
+static inline int dma_memory_write(AddressSpace *as, dma_addr_t addr,
                                    const void *buf, dma_addr_t len)
 {
-    return dma_memory_rw(dma, addr, (void *)buf, len,
+    return dma_memory_rw(as, addr, (void *)buf, len,
                          DMA_DIRECTION_FROM_DEVICE);
 }
 
-int dma_memory_set(DMAContext *dma, dma_addr_t addr, uint8_t c, dma_addr_t len);
+int dma_memory_set(AddressSpace *as, dma_addr_t addr, uint8_t c, dma_addr_t len);
 
-static inline void *dma_memory_map(DMAContext *dma,
+static inline void *dma_memory_map(AddressSpace *as,
                                    dma_addr_t addr, dma_addr_t *len,
                                    DMADirection dir)
 {
     hwaddr xlen = *len;
     void *p;
 
-    p = address_space_map(dma->as, addr, &xlen, dir == DMA_DIRECTION_FROM_DEVICE);
+    p = address_space_map(as, addr, &xlen, dir == DMA_DIRECTION_FROM_DEVICE);
     *len = xlen;
     return p;
 }
 
-static inline void dma_memory_unmap(DMAContext *dma,
+static inline void dma_memory_unmap(AddressSpace *as,
                                     void *buffer, dma_addr_t len,
                                     DMADirection dir, dma_addr_t access_len)
 {
-    address_space_unmap(dma->as, buffer, (hwaddr)len,
+    address_space_unmap(as, buffer, (hwaddr)len,
                         dir == DMA_DIRECTION_FROM_DEVICE, access_len);
 }
 
 #define DEFINE_LDST_DMA(_lname, _sname, _bits, _end) \
-    static inline uint##_bits##_t ld##_lname##_##_end##_dma(DMAContext *dma, \
+    static inline uint##_bits##_t ld##_lname##_##_end##_dma(AddressSpace *as, \
                                                             dma_addr_t addr) \
     {                                                                   \
         uint##_bits##_t val;                                            \
-        dma_memory_read(dma, addr, &val, (_bits) / 8);                  \
+        dma_memory_read(as, addr, &val, (_bits) / 8);                   \
         return _end##_bits##_to_cpu(val);                               \
     }                                                                   \
-    static inline void st##_sname##_##_end##_dma(DMAContext *dma,       \
+    static inline void st##_sname##_##_end##_dma(AddressSpace *as,      \
                                                  dma_addr_t addr,       \
                                                  uint##_bits##_t val)   \
     {                                                                   \
         val = cpu_to_##_end##_bits(val);                                \
-        dma_memory_write(dma, addr, &val, (_bits) / 8);                 \
+        dma_memory_write(as, addr, &val, (_bits) / 8);                  \
     }
 
-static inline uint8_t ldub_dma(DMAContext *dma, dma_addr_t addr)
+static inline uint8_t ldub_dma(AddressSpace *as, dma_addr_t addr)
 {
     uint8_t val;
 
-    dma_memory_read(dma, addr, &val, 1);
+    dma_memory_read(as, addr, &val, 1);
     return val;
 }
 
-static inline void stb_dma(DMAContext *dma, dma_addr_t addr, uint8_t val)
+static inline void stb_dma(AddressSpace *as, dma_addr_t addr, uint8_t val)
 {
-    dma_memory_write(dma, addr, &val, 1);
+    dma_memory_write(as, addr, &val, 1);
 }
 
 DEFINE_LDST_DMA(uw, w, 16, le);
@@ -193,14 +184,12 @@ DEFINE_LDST_DMA(q, q, 64, be);
 
 #undef DEFINE_LDST_DMA
 
-void dma_context_init(DMAContext *dma, AddressSpace *as);
-
 struct ScatterGatherEntry {
     dma_addr_t base;
     dma_addr_t len;
 };
 
-void qemu_sglist_init(QEMUSGList *qsg, int alloc_hint, DMAContext *dma);
+void qemu_sglist_init(QEMUSGList *qsg, int alloc_hint, AddressSpace *as);
 void qemu_sglist_add(QEMUSGList *qsg, dma_addr_t base, dma_addr_t len);
 void qemu_sglist_destroy(QEMUSGList *qsg);
 #endif
commit 96478592a93f93322ecc20d0a6eccb4d4ef33c7a
Author: Paolo Bonzini <pbonzini at redhat.com>
Date:   Thu Apr 11 12:38:50 2013 +0200

    spapr_vio: take care of creating our own AddressSpace/DMAContext
    
    Fetch the root region from the sPAPRTCETable, and use it to build
    an AddressSpace and DMAContext.
    
    Now, everywhere we have a DMAContext we also have access to the
    corresponding AddressSpace (either because we create it just before
    the DMAContext, or because dma_context_memory's AddressSpace is
    trivially address_space_memory).
    
    Acked-by: David Gibson <david at gibson.dropbear.id.au>
    Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>

diff --git a/hw/ppc/spapr_iommu.c b/hw/ppc/spapr_iommu.c
index 4667e11..91bc8e4 100644
--- a/hw/ppc/spapr_iommu.c
+++ b/hw/ppc/spapr_iommu.c
@@ -37,10 +37,6 @@ enum sPAPRTCEAccess {
 };
 
 struct sPAPRTCETable {
-    /* temporary until everyone has its own AddressSpace */
-    DMAContext dma;
-    AddressSpace as;
-
     uint32_t liobn;
     uint32_t window_size;
     sPAPRTCE *table;
@@ -157,8 +153,6 @@ sPAPRTCETable *spapr_tce_new_table(uint32_t liobn, size_t window_size)
 
     memory_region_init_iommu(&tcet->iommu, &spapr_iommu_ops,
                              "iommu-spapr", UINT64_MAX);
-    address_space_init(&tcet->as, &tcet->iommu);
-    dma_context_init(&tcet->dma, &tcet->as);
 
     QLIST_INSERT_HEAD(&spapr_tce_tables, tcet, list);
 
@@ -178,11 +172,6 @@ void spapr_tce_free(sPAPRTCETable *tcet)
     g_free(tcet);
 }
 
-DMAContext *spapr_tce_get_dma(sPAPRTCETable *tcet)
-{
-    return &tcet->dma;
-}
-
 MemoryRegion *spapr_tce_get_iommu(sPAPRTCETable *tcet)
 {
     return &tcet->iommu;
diff --git a/hw/ppc/spapr_vio.c b/hw/ppc/spapr_vio.c
index ae906a7..09cda64 100644
--- a/hw/ppc/spapr_vio.c
+++ b/hw/ppc/spapr_vio.c
@@ -454,7 +454,8 @@ static int spapr_vio_busdev_init(DeviceState *qdev)
     if (pc->rtce_window_size) {
         uint32_t liobn = SPAPR_VIO_BASE_LIOBN | dev->reg;
         dev->tcet = spapr_tce_new_table(liobn, pc->rtce_window_size);
-        dev->dma = spapr_tce_get_dma(dev->tcet);
+        address_space_init(&dev->as, spapr_tce_get_iommu(dev->tcet));
+        dma_context_init(&dev->dma, &dev->as);
     }
 
     return pc->init(dev);
diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
index 142abb7..a83720e 100644
--- a/include/hw/ppc/spapr.h
+++ b/include/hw/ppc/spapr.h
@@ -348,7 +348,6 @@ void spapr_iommu_init(void);
 void spapr_events_init(sPAPREnvironment *spapr);
 void spapr_events_fdt_skel(void *fdt, uint32_t epow_irq);
 sPAPRTCETable *spapr_tce_new_table(uint32_t liobn, size_t window_size);
-DMAContext *spapr_tce_get_dma(sPAPRTCETable *tcet);
 MemoryRegion *spapr_tce_get_iommu(sPAPRTCETable *tcet);
 void spapr_tce_free(sPAPRTCETable *tcet);
 void spapr_tce_reset(sPAPRTCETable *tcet);
diff --git a/include/hw/ppc/spapr_vio.h b/include/hw/ppc/spapr_vio.h
index 56f2821..2de58f1 100644
--- a/include/hw/ppc/spapr_vio.h
+++ b/include/hw/ppc/spapr_vio.h
@@ -63,8 +63,9 @@ struct VIOsPAPRDevice {
     uint32_t irq;
     target_ulong signal_state;
     VIOsPAPR_CRQ crq;
+    AddressSpace as;
+    DMAContext dma;
     sPAPRTCETable *tcet;
-    DMAContext *dma;
 };
 
 #define DEFINE_SPAPR_PROPERTIES(type, field)           \
@@ -92,35 +93,35 @@ static inline qemu_irq spapr_vio_qirq(VIOsPAPRDevice *dev)
 static inline bool spapr_vio_dma_valid(VIOsPAPRDevice *dev, uint64_t taddr,
                                        uint32_t size, DMADirection dir)
 {
-    return dma_memory_valid(dev->dma, taddr, size, dir);
+    return dma_memory_valid(&dev->dma, taddr, size, dir);
 }
 
 static inline int spapr_vio_dma_read(VIOsPAPRDevice *dev, uint64_t taddr,
                                      void *buf, uint32_t size)
 {
-    return (dma_memory_read(dev->dma, taddr, buf, size) != 0) ?
+    return (dma_memory_read(&dev->dma, taddr, buf, size) != 0) ?
         H_DEST_PARM : H_SUCCESS;
 }
 
 static inline int spapr_vio_dma_write(VIOsPAPRDevice *dev, uint64_t taddr,
                                       const void *buf, uint32_t size)
 {
-    return (dma_memory_write(dev->dma, taddr, buf, size) != 0) ?
+    return (dma_memory_write(&dev->dma, taddr, buf, size) != 0) ?
         H_DEST_PARM : H_SUCCESS;
 }
 
 static inline int spapr_vio_dma_set(VIOsPAPRDevice *dev, uint64_t taddr,
                                     uint8_t c, uint32_t size)
 {
-    return (dma_memory_set(dev->dma, taddr, c, size) != 0) ?
+    return (dma_memory_set(&dev->dma, taddr, c, size) != 0) ?
         H_DEST_PARM : H_SUCCESS;
 }
 
-#define vio_stb(_dev, _addr, _val) (stb_dma((_dev)->dma, (_addr), (_val)))
-#define vio_sth(_dev, _addr, _val) (stw_be_dma((_dev)->dma, (_addr), (_val)))
-#define vio_stl(_dev, _addr, _val) (stl_be_dma((_dev)->dma, (_addr), (_val)))
-#define vio_stq(_dev, _addr, _val) (stq_be_dma((_dev)->dma, (_addr), (_val)))
-#define vio_ldq(_dev, _addr) (ldq_be_dma((_dev)->dma, (_addr)))
+#define vio_stb(_dev, _addr, _val) (stb_dma(&(_dev)->dma, (_addr), (_val)))
+#define vio_sth(_dev, _addr, _val) (stw_be_dma(&(_dev)->dma, (_addr), (_val)))
+#define vio_stl(_dev, _addr, _val) (stl_be_dma(&(_dev)->dma, (_addr), (_val)))
+#define vio_stq(_dev, _addr, _val) (stq_be_dma(&(_dev)->dma, (_addr), (_val)))
+#define vio_ldq(_dev, _addr) (ldq_be_dma(&(_dev)->dma, (_addr)))
 
 int spapr_vio_send_crq(VIOsPAPRDevice *dev, uint8_t *crq);
 
commit e00387d58243d4ae24ac68008a2aea76313ab997
Author: Avi Kivity <avi.kivity at gmail.com>
Date:   Tue Oct 30 13:47:48 2012 +0200

    pci: use memory core for iommu support
    
    Use the new iommu support in the memory core for iommu support.  The only
    user, spapr, is also converted, but it still provides a DMAContext
    interface until the non-PCI bits switch to AddressSpace.
    
    Reviewed-by: Michael S. Tsirkin <mst at redhat.com>
    Signed-off-by: Avi Kivity <avi.kivity at gmail.com>
    [ Do not calls memory_region_del_subregion() on the device's
      bus_master_enable_region, it is an alias; return an AddressSpace
      from the IOMMU hook and remove the destructor hook. - David Gibson ]
    Signed-off-by: David Gibson <david at gibson.dropbear.id.au>
    Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>

diff --git a/hw/pci/pci.c b/hw/pci/pci.c
index 3bcd07d..ed9c8a1 100644
--- a/hw/pci/pci.c
+++ b/hw/pci/pci.c
@@ -786,6 +786,7 @@ static PCIDevice *do_pci_register_device(PCIDevice *pci_dev, PCIBus *bus,
     PCIDeviceClass *pc = PCI_DEVICE_GET_CLASS(pci_dev);
     PCIConfigReadFunc *config_read = pc->config_read;
     PCIConfigWriteFunc *config_write = pc->config_write;
+    AddressSpace *dma_as;
 
     if (devfn < 0) {
         for(devfn = bus->devfn_min ; devfn < ARRAY_SIZE(bus->devices);
@@ -801,22 +802,22 @@ static PCIDevice *do_pci_register_device(PCIDevice *pci_dev, PCIBus *bus,
                      PCI_SLOT(devfn), PCI_FUNC(devfn), name, bus->devices[devfn]->name);
         return NULL;
     }
+
     pci_dev->bus = bus;
-    if (bus->dma_context_fn) {
-        pci_dev->dma = bus->dma_context_fn(bus, bus->dma_context_opaque, devfn);
+    if (bus->iommu_fn) {
+        dma_as = bus->iommu_fn(bus, bus->iommu_opaque, devfn);
     } else {
-        /* FIXME: Make dma_context_fn use MemoryRegions instead, so this path is
-         * taken unconditionally */
         /* FIXME: inherit memory region from bus creator */
-        memory_region_init_alias(&pci_dev->bus_master_enable_region, "bus master",
-                                 get_system_memory(), 0,
-                                 memory_region_size(get_system_memory()));
-        memory_region_set_enabled(&pci_dev->bus_master_enable_region, false);
-        address_space_init(&pci_dev->bus_master_as, &pci_dev->bus_master_enable_region);
-        pci_dev->dma = g_new(DMAContext, 1);
-        dma_context_init(pci_dev->dma, &pci_dev->bus_master_as);
+        dma_as = &address_space_memory;
     }
 
+    memory_region_init_alias(&pci_dev->bus_master_enable_region, "bus master",
+                             dma_as->root, 0, memory_region_size(dma_as->root));
+    memory_region_set_enabled(&pci_dev->bus_master_enable_region, false);
+    address_space_init(&pci_dev->bus_master_as, &pci_dev->bus_master_enable_region);
+    pci_dev->dma = g_new(DMAContext, 1);
+    dma_context_init(pci_dev->dma, &pci_dev->bus_master_as);
+
     pci_dev->devfn = devfn;
     pstrcpy(pci_dev->name, sizeof(pci_dev->name), name);
     pci_dev->irq_state = 0;
@@ -870,12 +871,10 @@ static void do_pci_unregister_device(PCIDevice *pci_dev)
     pci_dev->bus->devices[pci_dev->devfn] = NULL;
     pci_config_free(pci_dev);
 
-    if (!pci_dev->bus->dma_context_fn) {
-        address_space_destroy(&pci_dev->bus_master_as);
-        memory_region_destroy(&pci_dev->bus_master_enable_region);
-        g_free(pci_dev->dma);
-        pci_dev->dma = NULL;
-    }
+    address_space_destroy(&pci_dev->bus_master_as);
+    memory_region_destroy(&pci_dev->bus_master_enable_region);
+    g_free(pci_dev->dma);
+    pci_dev->dma = NULL;
 }
 
 static void pci_unregister_io_regions(PCIDevice *pci_dev)
@@ -2232,10 +2231,10 @@ static void pci_device_class_init(ObjectClass *klass, void *data)
     k->props = pci_props;
 }
 
-void pci_setup_iommu(PCIBus *bus, PCIDMAContextFunc fn, void *opaque)
+void pci_setup_iommu(PCIBus *bus, PCIIOMMUFunc fn, void *opaque)
 {
-    bus->dma_context_fn = fn;
-    bus->dma_context_opaque = opaque;
+    bus->iommu_fn = fn;
+    bus->iommu_opaque = opaque;
 }
 
 static const TypeInfo pci_device_type_info = {
diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c
index eb64a8f..459398c 100644
--- a/hw/ppc/spapr_pci.c
+++ b/hw/ppc/spapr_pci.c
@@ -506,12 +506,11 @@ static const MemoryRegionOps spapr_msi_ops = {
 /*
  * PHB PCI device
  */
-static DMAContext *spapr_pci_dma_context_fn(PCIBus *bus, void *opaque,
-                                            int devfn)
+static AddressSpace *spapr_pci_dma_iommu(PCIBus *bus, void *opaque, int devfn)
 {
     sPAPRPHBState *phb = opaque;
 
-    return spapr_tce_get_dma(phb->tcet);
+    return &phb->iommu_as;
 }
 
 static int spapr_phb_init(SysBusDevice *s)
@@ -651,7 +650,8 @@ static int spapr_phb_init(SysBusDevice *s)
         fprintf(stderr, "Unable to create TCE table for %s\n", sphb->dtbusname);
         return -1;
     }
-    pci_setup_iommu(bus, spapr_pci_dma_context_fn, sphb);
+    address_space_init(&sphb->iommu_as, spapr_tce_get_iommu(sphb->tcet));
+    pci_setup_iommu(bus, spapr_pci_dma_iommu, sphb);
 
     QLIST_INSERT_HEAD(&spapr->phbs, sphb, list);
 
diff --git a/include/hw/pci-host/spapr.h b/include/hw/pci-host/spapr.h
index 653dd40..1e23dbf 100644
--- a/include/hw/pci-host/spapr.h
+++ b/include/hw/pci-host/spapr.h
@@ -50,6 +50,7 @@ typedef struct sPAPRPHBState {
     uint64_t dma_window_start;
     uint64_t dma_window_size;
     sPAPRTCETable *tcet;
+    AddressSpace iommu_as;
 
     struct {
         uint32_t irq;
diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
index 8d075ab..3a85bce 100644
--- a/include/hw/pci/pci.h
+++ b/include/hw/pci/pci.h
@@ -401,9 +401,9 @@ int pci_read_devaddr(Monitor *mon, const char *addr, int *domp, int *busp,
 
 void pci_device_deassert_intx(PCIDevice *dev);
 
-typedef DMAContext *(*PCIDMAContextFunc)(PCIBus *, void *, int);
+typedef AddressSpace *(*PCIIOMMUFunc)(PCIBus *, void *, int);
 
-void pci_setup_iommu(PCIBus *bus, PCIDMAContextFunc fn, void *opaque);
+void pci_setup_iommu(PCIBus *bus, PCIIOMMUFunc fn, void *opaque);
 
 static inline void
 pci_set_byte(uint8_t *config, uint8_t val)
diff --git a/include/hw/pci/pci_bus.h b/include/hw/pci/pci_bus.h
index 6ee443c..66762f6 100644
--- a/include/hw/pci/pci_bus.h
+++ b/include/hw/pci/pci_bus.h
@@ -10,8 +10,8 @@
 
 struct PCIBus {
     BusState qbus;
-    PCIDMAContextFunc dma_context_fn;
-    void *dma_context_opaque;
+    PCIIOMMUFunc iommu_fn;
+    void *iommu_opaque;
     uint8_t devfn_min;
     pci_set_irq_fn set_irq;
     pci_map_irq_fn map_irq;
commit 24addbc76dcbb1d1c85b3062bbf7a030831cc7a9
Author: Paolo Bonzini <pbonzini at redhat.com>
Date:   Wed Apr 10 17:49:04 2013 +0200

    dma: eliminate old-style IOMMU support
    
    The translate function in the DMAContext is now always NULL.
    Remove every reference to it.
    
    Reviewed-by: Peter Maydell <peter.maydell at linaro.org>
    Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>

diff --git a/dma-helpers.c b/dma-helpers.c
index 2e298b6..c53705a 100644
--- a/dma-helpers.c
+++ b/dma-helpers.c
@@ -14,32 +14,26 @@
 
 /* #define DEBUG_IOMMU */
 
-static void do_dma_memory_set(AddressSpace *as,
-                              dma_addr_t addr, uint8_t c, dma_addr_t len)
+int dma_memory_set(DMAContext *dma, dma_addr_t addr, uint8_t c, dma_addr_t len)
 {
+    AddressSpace *as = dma->as;
+
+    dma_barrier(dma, DMA_DIRECTION_FROM_DEVICE);
+
 #define FILLBUF_SIZE 512
     uint8_t fillbuf[FILLBUF_SIZE];
     int l;
+    bool error = false;
 
     memset(fillbuf, c, FILLBUF_SIZE);
     while (len > 0) {
         l = len < FILLBUF_SIZE ? len : FILLBUF_SIZE;
-        address_space_rw(as, addr, fillbuf, l, true);
+        error |= address_space_rw(as, addr, fillbuf, l, true);
         len -= l;
         addr += l;
     }
-}
 
-int dma_memory_set(DMAContext *dma, dma_addr_t addr, uint8_t c, dma_addr_t len)
-{
-    dma_barrier(dma, DMA_DIRECTION_FROM_DEVICE);
-
-    if (dma_has_iommu(dma)) {
-        return iommu_dma_memory_set(dma, addr, c, len);
-    }
-    do_dma_memory_set(dma->as, addr, c, len);
-
-    return 0;
+    return error;
 }
 
 void qemu_sglist_init(QEMUSGList *qsg, int alloc_hint, DMAContext *dma)
@@ -278,162 +272,10 @@ void dma_acct_start(BlockDriverState *bs, BlockAcctCookie *cookie,
     bdrv_acct_start(bs, cookie, sg->size, type);
 }
 
-bool iommu_dma_memory_valid(DMAContext *dma, dma_addr_t addr, dma_addr_t len,
-                            DMADirection dir)
+void dma_context_init(DMAContext *dma, AddressSpace *as)
 {
-    hwaddr paddr, plen;
-
 #ifdef DEBUG_IOMMU
-    fprintf(stderr, "dma_memory_check context=%p addr=0x" DMA_ADDR_FMT
-            " len=0x" DMA_ADDR_FMT " dir=%d\n", dma, addr, len, dir);
-#endif
-
-    while (len) {
-        if (dma->translate(dma, addr, &paddr, &plen, dir) != 0) {
-            return false;
-        }
-
-        /* The translation might be valid for larger regions. */
-        if (plen > len) {
-            plen = len;
-        }
-
-        if (!address_space_access_valid(dma->as, paddr, len,
-                                        dir == DMA_DIRECTION_FROM_DEVICE)) {
-            return false;
-        }
-
-        len -= plen;
-        addr += plen;
-    }
-
-    return true;
-}
-
-int iommu_dma_memory_rw(DMAContext *dma, dma_addr_t addr,
-                        void *buf, dma_addr_t len, DMADirection dir)
-{
-    hwaddr paddr, plen;
-    int err;
-
-#ifdef DEBUG_IOMMU
-    fprintf(stderr, "dma_memory_rw context=%p addr=0x" DMA_ADDR_FMT " len=0x"
-            DMA_ADDR_FMT " dir=%d\n", dma, addr, len, dir);
-#endif
-
-    while (len) {
-        err = dma->translate(dma, addr, &paddr, &plen, dir);
-        if (err) {
-	    /*
-             * In case of failure on reads from the guest, we clean the
-             * destination buffer so that a device that doesn't test
-             * for errors will not expose qemu internal memory.
-	     */
-	    memset(buf, 0, len);
-            return -1;
-        }
-
-        /* The translation might be valid for larger regions. */
-        if (plen > len) {
-            plen = len;
-        }
-
-        address_space_rw(dma->as, paddr, buf, plen, dir == DMA_DIRECTION_FROM_DEVICE);
-
-        len -= plen;
-        addr += plen;
-        buf += plen;
-    }
-
-    return 0;
-}
-
-int iommu_dma_memory_set(DMAContext *dma, dma_addr_t addr, uint8_t c,
-                         dma_addr_t len)
-{
-    hwaddr paddr, plen;
-    int err;
-
-#ifdef DEBUG_IOMMU
-    fprintf(stderr, "dma_memory_set context=%p addr=0x" DMA_ADDR_FMT
-            " len=0x" DMA_ADDR_FMT "\n", dma, addr, len);
-#endif
-
-    while (len) {
-        err = dma->translate(dma, addr, &paddr, &plen,
-                             DMA_DIRECTION_FROM_DEVICE);
-        if (err) {
-            return err;
-        }
-
-        /* The translation might be valid for larger regions. */
-        if (plen > len) {
-            plen = len;
-        }
-
-        do_dma_memory_set(dma->as, paddr, c, plen);
-
-        len -= plen;
-        addr += plen;
-    }
-
-    return 0;
-}
-
-void dma_context_init(DMAContext *dma, AddressSpace *as, DMATranslateFunc translate,
-                      DMAMapFunc map, DMAUnmapFunc unmap)
-{
-#ifdef DEBUG_IOMMU
-    fprintf(stderr, "dma_context_init(%p, %p, %p, %p)\n",
-            dma, translate, map, unmap);
+    fprintf(stderr, "dma_context_init(%p -> %p)\n", dma, as);
 #endif
     dma->as = as;
-    dma->translate = translate;
-    dma->map = map;
-    dma->unmap = unmap;
-}
-
-void *iommu_dma_memory_map(DMAContext *dma, dma_addr_t addr, dma_addr_t *len,
-                           DMADirection dir)
-{
-    int err;
-    hwaddr paddr, plen;
-    void *buf;
-
-    if (dma->map) {
-        return dma->map(dma, addr, len, dir);
-    }
-
-    plen = *len;
-    err = dma->translate(dma, addr, &paddr, &plen, dir);
-    if (err) {
-        return NULL;
-    }
-
-    /*
-     * If this is true, the virtual region is contiguous,
-     * but the translated physical region isn't. We just
-     * clamp *len, much like address_space_map() does.
-     */
-    if (plen < *len) {
-        *len = plen;
-    }
-
-    buf = address_space_map(dma->as, paddr, &plen, dir == DMA_DIRECTION_FROM_DEVICE);
-    *len = plen;
-
-    return buf;
-}
-
-void iommu_dma_memory_unmap(DMAContext *dma, void *buffer, dma_addr_t len,
-                            DMADirection dir, dma_addr_t access_len)
-{
-    if (dma->unmap) {
-        dma->unmap(dma, buffer, len, dir, access_len);
-        return;
-    }
-
-    address_space_unmap(dma->as, buffer, len, dir == DMA_DIRECTION_FROM_DEVICE,
-                        access_len);
-
 }
diff --git a/exec.c b/exec.c
index dfddd85..89259c9 100644
--- a/exec.c
+++ b/exec.c
@@ -1840,8 +1840,7 @@ static void memory_map_init(void)
     memory_listener_register(&io_memory_listener, &address_space_io);
     memory_listener_register(&tcg_memory_listener, &address_space_memory);
 
-    dma_context_init(&dma_context_memory, &address_space_memory,
-                     NULL, NULL, NULL);
+    dma_context_init(&dma_context_memory, &address_space_memory);
 }
 
 MemoryRegion *get_system_memory(void)
diff --git a/hw/pci/pci.c b/hw/pci/pci.c
index a3eb19e..3bcd07d 100644
--- a/hw/pci/pci.c
+++ b/hw/pci/pci.c
@@ -814,8 +814,9 @@ static PCIDevice *do_pci_register_device(PCIDevice *pci_dev, PCIBus *bus,
         memory_region_set_enabled(&pci_dev->bus_master_enable_region, false);
         address_space_init(&pci_dev->bus_master_as, &pci_dev->bus_master_enable_region);
         pci_dev->dma = g_new(DMAContext, 1);
-        dma_context_init(pci_dev->dma, &pci_dev->bus_master_as, NULL, NULL, NULL);
+        dma_context_init(pci_dev->dma, &pci_dev->bus_master_as);
     }
+
     pci_dev->devfn = devfn;
     pstrcpy(pci_dev->name, sizeof(pci_dev->name), name);
     pci_dev->irq_state = 0;
diff --git a/hw/ppc/spapr_iommu.c b/hw/ppc/spapr_iommu.c
index 6e33929..4667e11 100644
--- a/hw/ppc/spapr_iommu.c
+++ b/hw/ppc/spapr_iommu.c
@@ -158,7 +158,7 @@ sPAPRTCETable *spapr_tce_new_table(uint32_t liobn, size_t window_size)
     memory_region_init_iommu(&tcet->iommu, &spapr_iommu_ops,
                              "iommu-spapr", UINT64_MAX);
     address_space_init(&tcet->as, &tcet->iommu);
-    dma_context_init(&tcet->dma, &tcet->as, NULL, NULL, NULL);
+    dma_context_init(&tcet->dma, &tcet->as);
 
     QLIST_INSERT_HEAD(&spapr_tce_tables, tcet, list);
 
diff --git a/include/sysemu/dma.h b/include/sysemu/dma.h
index 02e0dcd..3317dcf 100644
--- a/include/sysemu/dma.h
+++ b/include/sysemu/dma.h
@@ -46,26 +46,8 @@ typedef uint64_t dma_addr_t;
 #define DMA_ADDR_BITS 64
 #define DMA_ADDR_FMT "%" PRIx64
 
-typedef int DMATranslateFunc(DMAContext *dma,
-                             dma_addr_t addr,
-                             hwaddr *paddr,
-                             hwaddr *len,
-                             DMADirection dir);
-typedef void* DMAMapFunc(DMAContext *dma,
-                         dma_addr_t addr,
-                         dma_addr_t *len,
-                         DMADirection dir);
-typedef void DMAUnmapFunc(DMAContext *dma,
-                          void *buffer,
-                          dma_addr_t len,
-                          DMADirection dir,
-                          dma_addr_t access_len);
-
 struct DMAContext {
     AddressSpace *as;
-    DMATranslateFunc *translate;
-    DMAMapFunc *map;
-    DMAUnmapFunc *unmap;
 };
 
 /* A global DMA context corresponding to the address_space_memory
@@ -98,41 +80,22 @@ static inline void dma_barrier(DMAContext *dma, DMADirection dir)
     }
 }
 
-static inline bool dma_has_iommu(DMAContext *dma)
-{
-    return dma && dma->translate;
-}
-
 /* Checks that the given range of addresses is valid for DMA.  This is
  * useful for certain cases, but usually you should just use
  * dma_memory_{read,write}() and check for errors */
-bool iommu_dma_memory_valid(DMAContext *dma, dma_addr_t addr, dma_addr_t len,
-                            DMADirection dir);
 static inline bool dma_memory_valid(DMAContext *dma,
                                     dma_addr_t addr, dma_addr_t len,
                                     DMADirection dir)
 {
-    if (!dma_has_iommu(dma)) {
-        return address_space_access_valid(dma->as, addr, len,
-                                          dir == DMA_DIRECTION_FROM_DEVICE);
-    } else {
-        return iommu_dma_memory_valid(dma, addr, len, dir);
-    }
+    return address_space_access_valid(dma->as, addr, len,
+                                      dir == DMA_DIRECTION_FROM_DEVICE);
 }
 
-int iommu_dma_memory_rw(DMAContext *dma, dma_addr_t addr,
-                        void *buf, dma_addr_t len, DMADirection dir);
 static inline int dma_memory_rw_relaxed(DMAContext *dma, dma_addr_t addr,
                                         void *buf, dma_addr_t len,
                                         DMADirection dir)
 {
-    if (!dma_has_iommu(dma)) {
-        /* Fast-path for no IOMMU */
-        address_space_rw(dma->as, addr, buf, len, dir == DMA_DIRECTION_FROM_DEVICE);
-        return 0;
-    } else {
-        return iommu_dma_memory_rw(dma, addr, buf, len, dir);
-    }
+    return address_space_rw(dma->as, addr, buf, len, dir == DMA_DIRECTION_FROM_DEVICE);
 }
 
 static inline int dma_memory_read_relaxed(DMAContext *dma, dma_addr_t addr,
@@ -170,43 +133,26 @@ static inline int dma_memory_write(DMAContext *dma, dma_addr_t addr,
                          DMA_DIRECTION_FROM_DEVICE);
 }
 
-int iommu_dma_memory_set(DMAContext *dma, dma_addr_t addr, uint8_t c,
-			 dma_addr_t len);
-
 int dma_memory_set(DMAContext *dma, dma_addr_t addr, uint8_t c, dma_addr_t len);
 
-void *iommu_dma_memory_map(DMAContext *dma,
-                           dma_addr_t addr, dma_addr_t *len,
-                           DMADirection dir);
 static inline void *dma_memory_map(DMAContext *dma,
                                    dma_addr_t addr, dma_addr_t *len,
                                    DMADirection dir)
 {
-    if (!dma_has_iommu(dma)) {
-        hwaddr xlen = *len;
-        void *p;
-
-        p = address_space_map(dma->as, addr, &xlen, dir == DMA_DIRECTION_FROM_DEVICE);
-        *len = xlen;
-        return p;
-    } else {
-        return iommu_dma_memory_map(dma, addr, len, dir);
-    }
+    hwaddr xlen = *len;
+    void *p;
+
+    p = address_space_map(dma->as, addr, &xlen, dir == DMA_DIRECTION_FROM_DEVICE);
+    *len = xlen;
+    return p;
 }
 
-void iommu_dma_memory_unmap(DMAContext *dma,
-                            void *buffer, dma_addr_t len,
-                            DMADirection dir, dma_addr_t access_len);
 static inline void dma_memory_unmap(DMAContext *dma,
                                     void *buffer, dma_addr_t len,
                                     DMADirection dir, dma_addr_t access_len)
 {
-    if (!dma_has_iommu(dma)) {
-        address_space_unmap(dma->as, buffer, (hwaddr)len,
-                            dir == DMA_DIRECTION_FROM_DEVICE, access_len);
-    } else {
-        iommu_dma_memory_unmap(dma, buffer, len, dir, access_len);
-    }
+    address_space_unmap(dma->as, buffer, (hwaddr)len,
+                        dir == DMA_DIRECTION_FROM_DEVICE, access_len);
 }
 
 #define DEFINE_LDST_DMA(_lname, _sname, _bits, _end) \
@@ -247,8 +193,7 @@ DEFINE_LDST_DMA(q, q, 64, be);
 
 #undef DEFINE_LDST_DMA
 
-void dma_context_init(DMAContext *dma, AddressSpace *as, DMATranslateFunc translate,
-                      DMAMapFunc map, DMAUnmapFunc unmap);
+void dma_context_init(DMAContext *dma, AddressSpace *as);
 
 struct ScatterGatherEntry {
     dma_addr_t base;
commit a84bb436696159d460d03db809c27a854cee0863
Author: Paolo Bonzini <pbonzini at redhat.com>
Date:   Thu Apr 11 12:35:33 2013 +0200

    spapr: use memory core for iommu support
    
    Now we can stop using a "translating" DMAContext, but we do not yet modify
    the sPAPRTCETable users to get an AddressSpace; they keep using the table
    via a DMAContext.
    
    Acked-by: David Gibson <david at gibson.dropbear.id.au>
    Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>

diff --git a/hw/ppc/spapr_iommu.c b/hw/ppc/spapr_iommu.c
index cf5ccb1..6e33929 100644
--- a/hw/ppc/spapr_iommu.c
+++ b/hw/ppc/spapr_iommu.c
@@ -37,12 +37,16 @@ enum sPAPRTCEAccess {
 };
 
 struct sPAPRTCETable {
+    /* temporary until everyone has its own AddressSpace */
     DMAContext dma;
+    AddressSpace as;
+
     uint32_t liobn;
     uint32_t window_size;
     sPAPRTCE *table;
     bool bypass;
     int fd;
+    MemoryRegion iommu;
     QLIST_ENTRY(sPAPRTCETable) list;
 };
 
@@ -68,8 +72,9 @@ static sPAPRTCETable *spapr_tce_find_by_liobn(uint32_t liobn)
     return NULL;
 }
 
-static IOMMUTLBEntry spapr_tce_translate_iommu(sPAPRTCETable *tcet, hwaddr addr)
+static IOMMUTLBEntry spapr_tce_translate_iommu(MemoryRegion *iommu, hwaddr addr)
 {
+    sPAPRTCETable *tcet = container_of(iommu, sPAPRTCETable, iommu);
     uint64_t tce;
 
 #ifdef DEBUG_TCE
@@ -111,24 +116,9 @@ static IOMMUTLBEntry spapr_tce_translate_iommu(sPAPRTCETable *tcet, hwaddr addr)
     };
 }
 
-static int spapr_tce_translate(DMAContext *dma,
-                               dma_addr_t addr,
-                               hwaddr *paddr,
-                               hwaddr *len,
-                               DMADirection dir)
- {
-    sPAPRTCETable *tcet = DO_UPCAST(sPAPRTCETable, dma, dma);
-    bool is_write = (dir == DMA_DIRECTION_FROM_DEVICE);
-    IOMMUTLBEntry entry = spapr_tce_translate_iommu(tcet, addr);
-    if (!(entry.perm & (1 << is_write))) {
-        return -EPERM;
-    }
-
-    /* Translate */
-    *paddr = entry.translated_addr | (addr & entry.addr_mask);
-    *len = (addr | entry.addr_mask) - addr + 1;
-    return 0;
-}
+static MemoryRegionIOMMUOps spapr_iommu_ops = {
+    .translate = spapr_tce_translate_iommu,
+};
 
 sPAPRTCETable *spapr_tce_new_table(uint32_t liobn, size_t window_size)
 {
@@ -145,8 +135,6 @@ sPAPRTCETable *spapr_tce_new_table(uint32_t liobn, size_t window_size)
     }
 
     tcet = g_malloc0(sizeof(*tcet));
-    dma_context_init(&tcet->dma, &address_space_memory, spapr_tce_translate, NULL, NULL);
-
     tcet->liobn = liobn;
     tcet->window_size = window_size;
 
@@ -167,6 +155,11 @@ sPAPRTCETable *spapr_tce_new_table(uint32_t liobn, size_t window_size)
             "table @ %p, fd=%d\n", tcet, liobn, tcet->table, tcet->fd);
 #endif
 
+    memory_region_init_iommu(&tcet->iommu, &spapr_iommu_ops,
+                             "iommu-spapr", UINT64_MAX);
+    address_space_init(&tcet->as, &tcet->iommu);
+    dma_context_init(&tcet->dma, &tcet->as, NULL, NULL, NULL);
+
     QLIST_INSERT_HEAD(&spapr_tce_tables, tcet, list);
 
     return tcet;
@@ -190,6 +183,11 @@ DMAContext *spapr_tce_get_dma(sPAPRTCETable *tcet)
     return &tcet->dma;
 }
 
+MemoryRegion *spapr_tce_get_iommu(sPAPRTCETable *tcet)
+{
+    return &tcet->iommu;
+}
+
 void spapr_tce_set_bypass(sPAPRTCETable *tcet, bool bypass)
 {
     tcet->bypass = bypass;
@@ -208,6 +206,7 @@ static target_ulong put_tce_emu(sPAPRTCETable *tcet, target_ulong ioba,
                                 target_ulong tce)
 {
     sPAPRTCE *tcep;
+    IOMMUTLBEntry entry;
 
     if (ioba >= tcet->window_size) {
         hcall_dprintf("spapr_vio_put_tce on out-of-bounds IOBA 0x"
@@ -218,6 +217,13 @@ static target_ulong put_tce_emu(sPAPRTCETable *tcet, target_ulong ioba,
     tcep = tcet->table + (ioba >> SPAPR_TCE_PAGE_SHIFT);
     tcep->tce = tce;
 
+    entry.target_as = &address_space_memory,
+    entry.iova = ioba & ~SPAPR_TCE_PAGE_MASK;
+    entry.translated_addr = tce & ~SPAPR_TCE_PAGE_MASK;
+    entry.addr_mask = SPAPR_TCE_PAGE_MASK;
+    entry.perm = tce;
+    memory_region_notify_iommu(&tcet->iommu, entry);
+
     return H_SUCCESS;
 }
 
diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
index e8d617b..142abb7 100644
--- a/include/hw/ppc/spapr.h
+++ b/include/hw/ppc/spapr.h
@@ -349,6 +349,7 @@ void spapr_events_init(sPAPREnvironment *spapr);
 void spapr_events_fdt_skel(void *fdt, uint32_t epow_irq);
 sPAPRTCETable *spapr_tce_new_table(uint32_t liobn, size_t window_size);
 DMAContext *spapr_tce_get_dma(sPAPRTCETable *tcet);
+MemoryRegion *spapr_tce_get_iommu(sPAPRTCETable *tcet);
 void spapr_tce_free(sPAPRTCETable *tcet);
 void spapr_tce_reset(sPAPRTCETable *tcet);
 void spapr_tce_set_bypass(sPAPRTCETable *tcet, bool bypass);
commit a71bfbfe9d0bb74912170435d687f3c5de86a9f6
Author: Paolo Bonzini <pbonzini at redhat.com>
Date:   Tue Apr 16 15:05:06 2013 +0200

    spapr: make IOMMU translation go through IOMMUTLBEntry
    
    The next step is to introduce the translation code that will be used for
    IOMMU MemoryRegions, but still do the actual translation in a DMAContext.
    
    Acked-by: David Gibson <david at gibson.dropbear.id.au>
    Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>

diff --git a/hw/ppc/spapr_iommu.c b/hw/ppc/spapr_iommu.c
index 7a507e0..cf5ccb1 100644
--- a/hw/ppc/spapr_iommu.c
+++ b/hw/ppc/spapr_iommu.c
@@ -68,15 +68,8 @@ static sPAPRTCETable *spapr_tce_find_by_liobn(uint32_t liobn)
     return NULL;
 }
 
-static int spapr_tce_translate(DMAContext *dma,
-                               dma_addr_t addr,
-                               hwaddr *paddr,
-                               hwaddr *len,
-                               DMADirection dir)
+static IOMMUTLBEntry spapr_tce_translate_iommu(sPAPRTCETable *tcet, hwaddr addr)
 {
-    sPAPRTCETable *tcet = DO_UPCAST(sPAPRTCETable, dma, dma);
-    enum sPAPRTCEAccess access = (dir == DMA_DIRECTION_FROM_DEVICE)
-        ? SPAPR_TCE_WO : SPAPR_TCE_RO;
     uint64_t tce;
 
 #ifdef DEBUG_TCE
@@ -85,9 +78,13 @@ static int spapr_tce_translate(DMAContext *dma,
 #endif
 
     if (tcet->bypass) {
-        *paddr = addr;
-        *len = (hwaddr)-1;
-        return 0;
+        return (IOMMUTLBEntry) {
+            .target_as = &address_space_memory,
+            .iova = 0,
+            .translated_addr = 0,
+            .addr_mask = ~(hwaddr)0,
+            .perm = IOMMU_RW,
+        };
     }
 
     /* Check if we are in bound */
@@ -95,28 +92,41 @@ static int spapr_tce_translate(DMAContext *dma,
 #ifdef DEBUG_TCE
         fprintf(stderr, "spapr_tce_translate out of bounds\n");
 #endif
-        return -EFAULT;
+        return (IOMMUTLBEntry) { .perm = IOMMU_NONE };
     }
 
     tce = tcet->table[addr >> SPAPR_TCE_PAGE_SHIFT].tce;
 
-    /* Check TCE */
-    if (!(tce & access)) {
+#ifdef DEBUG_TCE
+    fprintf(stderr, " ->  *paddr=0x%llx, *len=0x%llx\n",
+            (tce & ~SPAPR_TCE_PAGE_MASK), SPAPR_TCE_PAGE_MASK + 1);
+#endif
+
+    return (IOMMUTLBEntry) {
+        .target_as = &address_space_memory,
+        .iova = addr & ~SPAPR_TCE_PAGE_MASK,
+        .translated_addr = tce & ~SPAPR_TCE_PAGE_MASK,
+        .addr_mask = SPAPR_TCE_PAGE_MASK,
+        .perm = tce,
+    };
+}
+
+static int spapr_tce_translate(DMAContext *dma,
+                               dma_addr_t addr,
+                               hwaddr *paddr,
+                               hwaddr *len,
+                               DMADirection dir)
+ {
+    sPAPRTCETable *tcet = DO_UPCAST(sPAPRTCETable, dma, dma);
+    bool is_write = (dir == DMA_DIRECTION_FROM_DEVICE);
+    IOMMUTLBEntry entry = spapr_tce_translate_iommu(tcet, addr);
+    if (!(entry.perm & (1 << is_write))) {
         return -EPERM;
     }
 
-    /* How much til end of page ? */
-    *len = ((~addr) & SPAPR_TCE_PAGE_MASK) + 1;
-
     /* Translate */
-    *paddr = (tce & ~SPAPR_TCE_PAGE_MASK) |
-        (addr & SPAPR_TCE_PAGE_MASK);
-
-#ifdef DEBUG_TCE
-    fprintf(stderr, " ->  *paddr=0x" TARGET_FMT_plx ", *len=0x"
-            TARGET_FMT_plx "\n", *paddr, *len);
-#endif
-
+    *paddr = entry.translated_addr | (addr & entry.addr_mask);
+    *len = (addr | entry.addr_mask) - addr + 1;
     return 0;
 }
 
commit 2b7dc949e241ac2b069d2d6183c1346cad792662
Author: Paolo Bonzini <pbonzini at redhat.com>
Date:   Wed Apr 10 17:30:48 2013 +0200

    spapr: convert TCE API to use an opaque type
    
    The TCE table is currently returned as a DMAContext, and non-type-safe
    APIs are called later passing back the DMAContext.  Since we want to move
    away from DMAContext, use an opaque type instead, and add an accessor
    to retrieve the DMAContext from it.
    
    Acked-by: David Gibson <david at gibson.dropbear.id.au>
    Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>

diff --git a/hw/ppc/spapr_iommu.c b/hw/ppc/spapr_iommu.c
index e1fe941..7a507e0 100644
--- a/hw/ppc/spapr_iommu.c
+++ b/hw/ppc/spapr_iommu.c
@@ -36,8 +36,6 @@ enum sPAPRTCEAccess {
     SPAPR_TCE_RW = 3,
 };
 
-typedef struct sPAPRTCETable sPAPRTCETable;
-
 struct sPAPRTCETable {
     DMAContext dma;
     uint32_t liobn;
@@ -122,7 +120,7 @@ static int spapr_tce_translate(DMAContext *dma,
     return 0;
 }
 
-DMAContext *spapr_tce_new_dma_context(uint32_t liobn, size_t window_size)
+sPAPRTCETable *spapr_tce_new_table(uint32_t liobn, size_t window_size)
 {
     sPAPRTCETable *tcet;
 
@@ -155,43 +153,40 @@ DMAContext *spapr_tce_new_dma_context(uint32_t liobn, size_t window_size)
     }
 
 #ifdef DEBUG_TCE
-    fprintf(stderr, "spapr_iommu: New TCE table, liobn=0x%x, context @ %p, "
-            "table @ %p, fd=%d\n", liobn, &tcet->dma, tcet->table, tcet->fd);
+    fprintf(stderr, "spapr_iommu: New TCE table @ %p, liobn=0x%x, "
+            "table @ %p, fd=%d\n", tcet, liobn, tcet->table, tcet->fd);
 #endif
 
     QLIST_INSERT_HEAD(&spapr_tce_tables, tcet, list);
 
-    return &tcet->dma;
+    return tcet;
 }
 
-void spapr_tce_free(DMAContext *dma)
+void spapr_tce_free(sPAPRTCETable *tcet)
 {
+    QLIST_REMOVE(tcet, list);
 
-    if (dma) {
-        sPAPRTCETable *tcet = DO_UPCAST(sPAPRTCETable, dma, dma);
-
-        QLIST_REMOVE(tcet, list);
-
-        if (!kvm_enabled() ||
-            (kvmppc_remove_spapr_tce(tcet->table, tcet->fd,
-                                     tcet->window_size) != 0)) {
-            g_free(tcet->table);
-        }
-
-        g_free(tcet);
+    if (!kvm_enabled() ||
+        (kvmppc_remove_spapr_tce(tcet->table, tcet->fd,
+                                 tcet->window_size) != 0)) {
+        g_free(tcet->table);
     }
+
+    g_free(tcet);
 }
 
-void spapr_tce_set_bypass(DMAContext *dma, bool bypass)
+DMAContext *spapr_tce_get_dma(sPAPRTCETable *tcet)
 {
-    sPAPRTCETable *tcet = DO_UPCAST(sPAPRTCETable, dma, dma);
+    return &tcet->dma;
+}
 
+void spapr_tce_set_bypass(sPAPRTCETable *tcet, bool bypass)
+{
     tcet->bypass = bypass;
 }
 
-void spapr_tce_reset(DMAContext *dma)
+void spapr_tce_reset(sPAPRTCETable *tcet)
 {
-    sPAPRTCETable *tcet = DO_UPCAST(sPAPRTCETable, dma, dma);
     size_t table_size = (tcet->window_size >> SPAPR_TCE_PAGE_SHIFT)
         * sizeof(sPAPRTCE);
 
@@ -277,17 +272,12 @@ int spapr_dma_dt(void *fdt, int node_off, const char *propname,
 }
 
 int spapr_tcet_dma_dt(void *fdt, int node_off, const char *propname,
-                      DMAContext *iommu)
+                      sPAPRTCETable *tcet)
 {
-    if (!iommu) {
+    if (!tcet) {
         return 0;
     }
 
-    if (iommu->translate == spapr_tce_translate) {
-        sPAPRTCETable *tcet = DO_UPCAST(sPAPRTCETable, dma, iommu);
-        return spapr_dma_dt(fdt, node_off, propname,
-                tcet->liobn, 0, tcet->window_size);
-    }
-
-    return -1;
+    return spapr_dma_dt(fdt, node_off, propname,
+                        tcet->liobn, 0, tcet->window_size);
 }
diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c
index 62ff323..eb64a8f 100644
--- a/hw/ppc/spapr_pci.c
+++ b/hw/ppc/spapr_pci.c
@@ -511,7 +511,7 @@ static DMAContext *spapr_pci_dma_context_fn(PCIBus *bus, void *opaque,
 {
     sPAPRPHBState *phb = opaque;
 
-    return phb->dma;
+    return spapr_tce_get_dma(phb->tcet);
 }
 
 static int spapr_phb_init(SysBusDevice *s)
@@ -646,8 +646,8 @@ static int spapr_phb_init(SysBusDevice *s)
 
     sphb->dma_window_start = 0;
     sphb->dma_window_size = 0x40000000;
-    sphb->dma = spapr_tce_new_dma_context(sphb->dma_liobn, sphb->dma_window_size);
-    if (!sphb->dma) {
+    sphb->tcet = spapr_tce_new_table(sphb->dma_liobn, sphb->dma_window_size);
+    if (!sphb->tcet) {
         fprintf(stderr, "Unable to create TCE table for %s\n", sphb->dtbusname);
         return -1;
     }
@@ -676,7 +676,7 @@ static void spapr_phb_reset(DeviceState *qdev)
     sPAPRPHBState *sphb = SPAPR_PCI_HOST_BRIDGE(s);
 
     /* Reset the IOMMU state */
-    spapr_tce_reset(sphb->dma);
+    spapr_tce_reset(sphb->tcet);
 }
 
 static Property spapr_phb_properties[] = {
diff --git a/hw/ppc/spapr_vio.c b/hw/ppc/spapr_vio.c
index 304f316..ae906a7 100644
--- a/hw/ppc/spapr_vio.c
+++ b/hw/ppc/spapr_vio.c
@@ -142,7 +142,7 @@ static int vio_make_devnode(VIOsPAPRDevice *dev,
         }
     }
 
-    ret = spapr_tcet_dma_dt(fdt, node_off, "ibm,my-dma-window", dev->dma);
+    ret = spapr_tcet_dma_dt(fdt, node_off, "ibm,my-dma-window", dev->tcet);
     if (ret < 0) {
         return ret;
     }
@@ -315,8 +315,8 @@ int spapr_vio_send_crq(VIOsPAPRDevice *dev, uint8_t *crq)
 
 static void spapr_vio_quiesce_one(VIOsPAPRDevice *dev)
 {
-    if (dev->dma) {
-        spapr_tce_reset(dev->dma);
+    if (dev->tcet) {
+        spapr_tce_reset(dev->tcet);
     }
     free_crq(dev);
 }
@@ -341,12 +341,12 @@ static void rtas_set_tce_bypass(sPAPREnvironment *spapr, uint32_t token,
         return;
     }
 
-    if (!dev->dma) {
+    if (!dev->tcet) {
         rtas_st(rets, 0, -3);
         return;
     }
 
-    spapr_tce_set_bypass(dev->dma, !!enable);
+    spapr_tce_set_bypass(dev->tcet, !!enable);
 
     rtas_st(rets, 0, 0);
 }
@@ -453,7 +453,8 @@ static int spapr_vio_busdev_init(DeviceState *qdev)
 
     if (pc->rtce_window_size) {
         uint32_t liobn = SPAPR_VIO_BASE_LIOBN | dev->reg;
-        dev->dma = spapr_tce_new_dma_context(liobn, pc->rtce_window_size);
+        dev->tcet = spapr_tce_new_table(liobn, pc->rtce_window_size);
+        dev->dma = spapr_tce_get_dma(dev->tcet);
     }
 
     return pc->init(dev);
diff --git a/include/hw/pci-host/spapr.h b/include/hw/pci-host/spapr.h
index b21080c..653dd40 100644
--- a/include/hw/pci-host/spapr.h
+++ b/include/hw/pci-host/spapr.h
@@ -49,7 +49,7 @@ typedef struct sPAPRPHBState {
     uint32_t dma_liobn;
     uint64_t dma_window_start;
     uint64_t dma_window_size;
-    DMAContext *dma;
+    sPAPRTCETable *tcet;
 
     struct {
         uint32_t irq;
diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
index 864bee9..e8d617b 100644
--- a/include/hw/ppc/spapr.h
+++ b/include/hw/ppc/spapr.h
@@ -342,17 +342,19 @@ typedef struct sPAPRTCE {
 
 #define RTAS_ERROR_LOG_MAX      2048
 
+typedef struct sPAPRTCETable sPAPRTCETable;
 
 void spapr_iommu_init(void);
 void spapr_events_init(sPAPREnvironment *spapr);
 void spapr_events_fdt_skel(void *fdt, uint32_t epow_irq);
-DMAContext *spapr_tce_new_dma_context(uint32_t liobn, size_t window_size);
-void spapr_tce_free(DMAContext *dma);
-void spapr_tce_reset(DMAContext *dma);
-void spapr_tce_set_bypass(DMAContext *dma, bool bypass);
+sPAPRTCETable *spapr_tce_new_table(uint32_t liobn, size_t window_size);
+DMAContext *spapr_tce_get_dma(sPAPRTCETable *tcet);
+void spapr_tce_free(sPAPRTCETable *tcet);
+void spapr_tce_reset(sPAPRTCETable *tcet);
+void spapr_tce_set_bypass(sPAPRTCETable *tcet, bool bypass);
 int spapr_dma_dt(void *fdt, int node_off, const char *propname,
                  uint32_t liobn, uint64_t window, uint32_t size);
 int spapr_tcet_dma_dt(void *fdt, int node_off, const char *propname,
-                      DMAContext *dma);
+                      sPAPRTCETable *tcet);
 
 #endif /* !defined (__HW_SPAPR_H__) */
diff --git a/include/hw/ppc/spapr_vio.h b/include/hw/ppc/spapr_vio.h
index f98ec0a..56f2821 100644
--- a/include/hw/ppc/spapr_vio.h
+++ b/include/hw/ppc/spapr_vio.h
@@ -63,6 +63,7 @@ struct VIOsPAPRDevice {
     uint32_t irq;
     target_ulong signal_state;
     VIOsPAPR_CRQ crq;
+    sPAPRTCETable *tcet;
     DMAContext *dma;
 };
 
commit 06d985f5d844d07d31b4dada20f4ff6cf0d6ff4a
Author: Avi Kivity <avi.kivity at gmail.com>
Date:   Tue Oct 30 13:47:49 2012 +0200

    vfio: abort if an emulated iommu is used
    
    vfio doesn't support guest iommus yet, indicate it to the user
    by gently depositing a core on their disk.
    
    Reviewed-by: Peter Maydell <peter.maydell at linaro.org>
    Signed-off-by: Avi Kivity <avi.kivity at gmail.com>
    Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>

diff --git a/hw/misc/vfio.c b/hw/misc/vfio.c
index c89676b..52fb036 100644
--- a/hw/misc/vfio.c
+++ b/hw/misc/vfio.c
@@ -1939,6 +1939,8 @@ static void vfio_listener_region_add(MemoryListener *listener,
     void *vaddr;
     int ret;
 
+    assert(!memory_region_is_iommu(section->mr));
+
     if (vfio_listener_skipped_section(section)) {
         DPRINTF("SKIPPING region_add %"HWADDR_PRIx" - %"PRIx64"\n",
                 section->offset_within_address_space,
commit 068665757da047d7d2980c17bba0659eb0ea0a89
Author: David Gibson <david at gibson.dropbear.id.au>
Date:   Tue May 14 19:13:56 2013 +1000

    memory: Add iommu map/unmap notifiers
    
    This patch adds a NotifierList to MemoryRegions which represent IOMMUs
    allowing other parts of the code to register interest in mappings or
    unmappings from the IOMMU.  All IOMMU implementations will need to call
    memory_region_notify_iommu() to inform those waiting on the notifier list,
    whenever an IOMMU mapping is made or removed.
    
    Signed-off-by: David Gibson <david at gibson.dropbear.id.au>
    Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>

diff --git a/include/exec/memory.h b/include/exec/memory.h
index a5e7721..75fa2b6 100644
--- a/include/exec/memory.h
+++ b/include/exec/memory.h
@@ -25,6 +25,7 @@
 #include "exec/iorange.h"
 #include "exec/ioport.h"
 #include "qemu/int128.h"
+#include "qemu/notify.h"
 
 #define MAX_PHYS_ADDR_SPACE_BITS 62
 #define MAX_PHYS_ADDR            (((hwaddr)1 << MAX_PHYS_ADDR_SPACE_BITS) - 1)
@@ -173,6 +174,7 @@ struct MemoryRegion {
     uint8_t dirty_log_mask;
     unsigned ioeventfd_nb;
     MemoryRegionIoeventfd *ioeventfds;
+    NotifierList iommu_notify;
 };
 
 struct MemoryRegionPortio {
@@ -424,6 +426,36 @@ static inline bool memory_region_is_romd(MemoryRegion *mr)
 bool memory_region_is_iommu(MemoryRegion *mr);
 
 /**
+ * memory_region_notify_iommu: notify a change in an IOMMU translation entry.
+ *
+ * @mr: the memory region that was changed
+ * @entry: the new entry in the IOMMU translation table.  The entry
+ *         replaces all old entries for the same virtual I/O address range.
+ *         Deleted entries have . at perm == 0.
+ */
+void memory_region_notify_iommu(MemoryRegion *mr,
+                                IOMMUTLBEntry entry);
+
+/**
+ * memory_region_register_iommu_notifier: register a notifier for changes to
+ * IOMMU translation entries.
+ *
+ * @mr: the memory region to observe
+ * @n: the notifier to be added; the notifier receives a pointer to an
+ *     #IOMMUTLBEntry as the opaque value; the pointer ceases to be
+ *     valid on exit from the notifier.
+ */
+void memory_region_register_iommu_notifier(MemoryRegion *mr, Notifier *n);
+
+/**
+ * memory_region_unregister_iommu_notifier: unregister a notifier for
+ * changes to IOMMU translation entries.
+ *
+ * @n: the notifier to be removed.
+ */
+void memory_region_unregister_iommu_notifier(Notifier *n);
+
+/**
  * memory_region_name: get a memory region's name
  *
  * Returns the string that was used to initialize the memory region.
diff --git a/memory.c b/memory.c
index 7eb8d46..2a94d7d 100644
--- a/memory.c
+++ b/memory.c
@@ -1072,6 +1072,7 @@ void memory_region_init_iommu(MemoryRegion *mr,
     memory_region_init(mr, name, size);
     mr->iommu_ops = ops,
     mr->terminates = true;  /* then re-forwards */
+    notifier_list_init(&mr->iommu_notify);
 }
 
 void memory_region_init_reservation(MemoryRegion *mr,
@@ -1124,6 +1125,23 @@ bool memory_region_is_iommu(MemoryRegion *mr)
     return mr->iommu_ops;
 }
 
+void memory_region_register_iommu_notifier(MemoryRegion *mr, Notifier *n)
+{
+    notifier_list_add(&mr->iommu_notify, n);
+}
+
+void memory_region_unregister_iommu_notifier(Notifier *n)
+{
+    notifier_remove(n);
+}
+
+void memory_region_notify_iommu(MemoryRegion *mr,
+                                IOMMUTLBEntry entry)
+{
+    assert(memory_region_is_iommu(mr));
+    notifier_list_notify(&mr->iommu_notify, &entry);
+}
+
 void memory_region_set_log(MemoryRegion *mr, bool log, unsigned client)
 {
     uint8_t mask = 1 << client;
commit 30951157441aed950ad8ca326500b4986d431c7a
Author: Avi Kivity <avi.kivity at gmail.com>
Date:   Tue Oct 30 13:47:46 2012 +0200

    memory: iommu support
    
    Add a new memory region type that translates addresses it is given,
    then forwards them to a target address space.  This is similar to
    an alias, except that the mapping is more flexible than a linear
    translation and trucation, and also less efficient since the
    translation happens at runtime.
    
    The implementation uses an AddressSpace mapping the target region to
    avoid hierarchical dispatch all the way to the resolved region; only
    iommu regions are looked up dynamically.
    
    Reviewed-by: Peter Maydell <peter.maydell at linaro.org>
    Signed-off-by: Avi Kivity <avi.kivity at gmail.com>
    [Modified to put translation in address_space_translate; assume
     IOMMUs are not reachable from TCG. - Paolo]
    Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>

diff --git a/exec.c b/exec.c
index 77eedd8..dfddd85 100644
--- a/exec.c
+++ b/exec.c
@@ -266,14 +266,45 @@ MemoryRegion *address_space_translate(AddressSpace *as, hwaddr addr,
                                       hwaddr *xlat, hwaddr *plen,
                                       bool is_write)
 {
-    return address_space_translate_internal(as, addr, xlat, plen, true)->mr;
+    IOMMUTLBEntry iotlb;
+    MemoryRegionSection *section;
+    MemoryRegion *mr;
+    hwaddr len = *plen;
+
+    for (;;) {
+        section = address_space_translate_internal(as, addr, &addr, plen, true);
+        mr = section->mr;
+
+        if (!mr->iommu_ops) {
+            break;
+        }
+
+        iotlb = mr->iommu_ops->translate(mr, addr);
+        addr = ((iotlb.translated_addr & ~iotlb.addr_mask)
+                | (addr & iotlb.addr_mask));
+        len = MIN(len, (addr | iotlb.addr_mask) - addr + 1);
+        if (!(iotlb.perm & (1 << is_write))) {
+            mr = &io_mem_unassigned;
+            break;
+        }
+
+        as = iotlb.target_as;
+    }
+
+    *plen = len;
+    *xlat = addr;
+    return mr;
 }
 
 MemoryRegionSection *
 address_space_translate_for_iotlb(AddressSpace *as, hwaddr addr, hwaddr *xlat,
                                   hwaddr *plen)
 {
-    return address_space_translate_internal(as, addr, xlat, plen, false);
+    MemoryRegionSection *section;
+    section = address_space_translate_internal(as, addr, xlat, plen, false);
+
+    assert(!section->mr->iommu_ops);
+    return section;
 }
 #endif
 
diff --git a/include/exec/memory.h b/include/exec/memory.h
index fddc6ad..a5e7721 100644
--- a/include/exec/memory.h
+++ b/include/exec/memory.h
@@ -53,6 +53,24 @@ struct MemoryRegionIORange {
     hwaddr offset;
 };
 
+typedef struct IOMMUTLBEntry IOMMUTLBEntry;
+
+/* See address_space_translate: bit 0 is read, bit 1 is write.  */
+typedef enum {
+    IOMMU_NONE = 0,
+    IOMMU_RO   = 1,
+    IOMMU_WO   = 2,
+    IOMMU_RW   = 3,
+} IOMMUAccessFlags;
+
+struct IOMMUTLBEntry {
+    AddressSpace    *target_as;
+    hwaddr           iova;
+    hwaddr           translated_addr;
+    hwaddr           addr_mask;  /* 0xfff = 4k translation */
+    IOMMUAccessFlags perm;
+};
+
 /*
  * Memory region callbacks
  */
@@ -115,12 +133,20 @@ struct MemoryRegionOps {
     const MemoryRegionMmio old_mmio;
 };
 
+typedef struct MemoryRegionIOMMUOps MemoryRegionIOMMUOps;
+
+struct MemoryRegionIOMMUOps {
+    /* Return a TLB entry that contains a given address. */
+    IOMMUTLBEntry (*translate)(MemoryRegion *iommu, hwaddr addr);
+};
+
 typedef struct CoalescedMemoryRange CoalescedMemoryRange;
 typedef struct MemoryRegionIoeventfd MemoryRegionIoeventfd;
 
 struct MemoryRegion {
     /* All fields are private - violators will be prosecuted */
     const MemoryRegionOps *ops;
+    const MemoryRegionIOMMUOps *iommu_ops;
     void *opaque;
     MemoryRegion *parent;
     Int128 size;
@@ -332,6 +358,24 @@ void memory_region_init_rom_device(MemoryRegion *mr,
 void memory_region_init_reservation(MemoryRegion *mr,
                                     const char *name,
                                     uint64_t size);
+
+/**
+ * memory_region_init_iommu: Initialize a memory region that translates
+ * addresses
+ *
+ * An IOMMU region translates addresses and forwards accesses to a target
+ * memory region.
+ *
+ * @mr: the #MemoryRegion to be initialized
+ * @ops: a function that translates addresses into the @target region
+ * @name: used for debugging; not visible to the user or ABI
+ * @size: size of the region.
+ */
+void memory_region_init_iommu(MemoryRegion *mr,
+                              const MemoryRegionIOMMUOps *ops,
+                              const char *name,
+                              uint64_t size);
+
 /**
  * memory_region_destroy: Destroy a memory region and reclaim all resources.
  *
@@ -371,6 +415,15 @@ static inline bool memory_region_is_romd(MemoryRegion *mr)
 }
 
 /**
+ * memory_region_is_iommu: check whether a memory region is an iommu
+ *
+ * Returns %true is a memory region is an iommu.
+ *
+ * @mr: the memory region being queried
+ */
+bool memory_region_is_iommu(MemoryRegion *mr);
+
+/**
  * memory_region_name: get a memory region's name
  *
  * Returns the string that was used to initialize the memory region.
@@ -825,7 +878,8 @@ void address_space_destroy(AddressSpace *as);
 /**
  * address_space_rw: read from or write to an address space.
  *
- * Return true if the operation hit any unassigned memory.
+ * Return true if the operation hit any unassigned memory or encountered an
+ * IOMMU fault.
  *
  * @as: #AddressSpace to be accessed
  * @addr: address within that address space
@@ -838,7 +892,8 @@ bool address_space_rw(AddressSpace *as, hwaddr addr, uint8_t *buf,
 /**
  * address_space_write: write to address space.
  *
- * Return true if the operation hit any unassigned memory.
+ * Return true if the operation hit any unassigned memory or encountered an
+ * IOMMU fault.
  *
  * @as: #AddressSpace to be accessed
  * @addr: address within that address space
@@ -850,7 +905,8 @@ bool address_space_write(AddressSpace *as, hwaddr addr,
 /**
  * address_space_read: read from an address space.
  *
- * Return true if the operation hit any unassigned memory.
+ * Return true if the operation hit any unassigned memory or encountered an
+ * IOMMU fault.
  *
  * @as: #AddressSpace to be accessed
  * @addr: address within that address space
@@ -875,7 +931,9 @@ MemoryRegion *address_space_translate(AddressSpace *as, hwaddr addr,
 /* address_space_access_valid: check for validity of accessing an address
  * space range
  *
- * Check whether memory is assigned to the given address space range.
+ * Check whether memory is assigned to the given address space range, and
+ * access is permitted by any IOMMU regions that are active for the address
+ * space.
  *
  * For now, addr and len should be aligned to a page size.  This limitation
  * will be lifted in the future.
diff --git a/memory.c b/memory.c
index 2b19745..7eb8d46 100644
--- a/memory.c
+++ b/memory.c
@@ -824,6 +824,7 @@ void memory_region_init(MemoryRegion *mr,
 {
     mr->ops = &unassigned_mem_ops;
     mr->opaque = NULL;
+    mr->iommu_ops = NULL;
     mr->parent = NULL;
     mr->size = int128_make64(size);
     if (size == UINT64_MAX) {
@@ -1063,6 +1064,16 @@ void memory_region_init_rom_device(MemoryRegion *mr,
     mr->ram_addr = qemu_ram_alloc(size, mr);
 }
 
+void memory_region_init_iommu(MemoryRegion *mr,
+                              const MemoryRegionIOMMUOps *ops,
+                              const char *name,
+                              uint64_t size)
+{
+    memory_region_init(mr, name, size);
+    mr->iommu_ops = ops,
+    mr->terminates = true;  /* then re-forwards */
+}
+
 void memory_region_init_reservation(MemoryRegion *mr,
                                     const char *name,
                                     uint64_t size)
@@ -1108,6 +1119,11 @@ bool memory_region_is_rom(MemoryRegion *mr)
     return mr->ram && mr->readonly;
 }
 
+bool memory_region_is_iommu(MemoryRegion *mr)
+{
+    return mr->iommu_ops;
+}
+
 void memory_region_set_log(MemoryRegion *mr, bool log, unsigned client)
 {
     uint8_t mask = 1 << client;
commit 052e87b073cb70afcd767d32f45af2794a5a65de
Author: Paolo Bonzini <pbonzini at redhat.com>
Date:   Mon May 27 10:08:27 2013 +0200

    memory: make section size a 128-bit integer
    
    So far, the size of all regions passed to listeners could fit in 64 bits,
    because artificial regions (containers and aliases) are eliminated by
    the memory core, leaving only device regions which have reasonable sizes
    
    An IOMMU however cannot be eliminated by the memory core, and may have
    an artificial size, hence we may need 65 bits to represent its size.
    
    Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>

diff --git a/exec.c b/exec.c
index b070b43..77eedd8 100644
--- a/exec.c
+++ b/exec.c
@@ -801,7 +801,7 @@ static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *secti
     MemoryRegionSection *existing = phys_page_find(d, base >> TARGET_PAGE_BITS);
     MemoryRegionSection subsection = {
         .offset_within_address_space = base,
-        .size = TARGET_PAGE_SIZE,
+        .size = int128_make64(TARGET_PAGE_SIZE),
     };
     hwaddr start, end;
 
@@ -816,16 +816,18 @@ static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *secti
         subpage = container_of(existing->mr, subpage_t, iomem);
     }
     start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
-    end = start + section->size - 1;
+    end = start + int128_get64(section->size) - 1;
     subpage_register(subpage, start, end, phys_section_add(section));
 }
 
 
-static void register_multipage(AddressSpaceDispatch *d, MemoryRegionSection *section)
+static void register_multipage(AddressSpaceDispatch *d,
+                               MemoryRegionSection *section)
 {
     hwaddr start_addr = section->offset_within_address_space;
     uint16_t section_index = phys_section_add(section);
-    uint64_t num_pages = section->size >> TARGET_PAGE_BITS;
+    uint64_t num_pages = int128_get64(int128_rshift(section->size,
+                                                    TARGET_PAGE_BITS));
 
     assert(num_pages);
     phys_page_set(d, start_addr >> TARGET_PAGE_BITS, num_pages, section_index);
@@ -835,28 +837,29 @@ static void mem_add(MemoryListener *listener, MemoryRegionSection *section)
 {
     AddressSpaceDispatch *d = container_of(listener, AddressSpaceDispatch, listener);
     MemoryRegionSection now = *section, remain = *section;
+    Int128 page_size = int128_make64(TARGET_PAGE_SIZE);
 
     if (now.offset_within_address_space & ~TARGET_PAGE_MASK) {
         uint64_t left = TARGET_PAGE_ALIGN(now.offset_within_address_space)
                        - now.offset_within_address_space;
 
-        now.size = MIN(left, now.size);
+        now.size = int128_min(int128_make64(left), now.size);
         register_subpage(d, &now);
     } else {
-        now.size = 0;
+        now.size = int128_zero();
     }
-    while (remain.size != now.size) {
-        remain.size -= now.size;
-        remain.offset_within_address_space += now.size;
-        remain.offset_within_region += now.size;
+    while (int128_ne(remain.size, now.size)) {
+        remain.size = int128_sub(remain.size, now.size);
+        remain.offset_within_address_space += int128_get64(now.size);
+        remain.offset_within_region += int128_get64(now.size);
         now = remain;
-        if (remain.size < TARGET_PAGE_SIZE) {
+        if (int128_lt(remain.size, page_size)) {
             register_subpage(d, &now);
         } else if (remain.offset_within_region & ~TARGET_PAGE_MASK) {
-            now.size = TARGET_PAGE_SIZE;
+            now.size = page_size;
             register_subpage(d, &now);
         } else {
-            now.size &= -TARGET_PAGE_SIZE;
+            now.size = int128_and(now.size, int128_neg(page_size));
             register_multipage(d, &now);
         }
     }
@@ -1666,7 +1669,7 @@ static uint16_t dummy_section(MemoryRegion *mr)
         .mr = mr,
         .offset_within_address_space = 0,
         .offset_within_region = 0,
-        .size = UINT64_MAX,
+        .size = int128_2_64(),
     };
 
     return phys_section_add(&section);
@@ -1735,14 +1738,16 @@ static void io_region_add(MemoryListener *listener,
     mrio->mr = section->mr;
     mrio->offset = section->offset_within_region;
     iorange_init(&mrio->iorange, &memory_region_iorange_ops,
-                 section->offset_within_address_space, section->size);
+                 section->offset_within_address_space,
+                 int128_get64(section->size));
     ioport_register(&mrio->iorange);
 }
 
 static void io_region_del(MemoryListener *listener,
                           MemoryRegionSection *section)
 {
-    isa_unassign_ioport(section->offset_within_address_space, section->size);
+    isa_unassign_ioport(section->offset_within_address_space,
+                        int128_get64(section->size));
 }
 
 static MemoryListener core_memory_listener = {
diff --git a/hw/core/loader.c b/hw/core/loader.c
index a711145..d569636 100644
--- a/hw/core/loader.c
+++ b/hw/core/loader.c
@@ -726,7 +726,7 @@ int rom_load_all(void)
         addr  = rom->addr;
         addr += rom->romsize;
         section = memory_region_find(get_system_memory(), rom->addr, 1);
-        rom->isrom = section.size && memory_region_is_rom(section.mr);
+        rom->isrom = int128_nz(section.size) && memory_region_is_rom(section.mr);
     }
     qemu_register_reset(rom_reset, NULL);
     roms_loaded = 1;
diff --git a/hw/display/exynos4210_fimd.c b/hw/display/exynos4210_fimd.c
index 6cb5016..0da00a9 100644
--- a/hw/display/exynos4210_fimd.c
+++ b/hw/display/exynos4210_fimd.c
@@ -1133,7 +1133,7 @@ static void fimd_update_memory_section(Exynos4210fimdState *s, unsigned win)
     DPRINT_TRACE("Window %u framebuffer changed: address=0x%08x, len=0x%x\n",
             win, fb_start_addr, w->fb_len);
 
-    if (w->mem_section.size != w->fb_len ||
+    if (int128_get64(w->mem_section.size) != w->fb_len ||
             !memory_region_is_ram(w->mem_section.mr)) {
         DPRINT_ERROR("Failed to find window %u framebuffer region\n", win);
         goto error_return;
@@ -1155,7 +1155,7 @@ static void fimd_update_memory_section(Exynos4210fimdState *s, unsigned win)
 
 error_return:
     w->mem_section.mr = NULL;
-    w->mem_section.size = 0;
+    w->mem_section.size = int128_zero();
     w->host_fb_addr = NULL;
     w->fb_len = 0;
 }
diff --git a/hw/display/framebuffer.c b/hw/display/framebuffer.c
index 6be31db..49c9e59 100644
--- a/hw/display/framebuffer.c
+++ b/hw/display/framebuffer.c
@@ -54,7 +54,8 @@ void framebuffer_update_display(
     src_len = src_width * rows;
 
     mem_section = memory_region_find(address_space, base, src_len);
-    if (mem_section.size != src_len || !memory_region_is_ram(mem_section.mr)) {
+    if (int128_get64(mem_section.size) != src_len ||
+            !memory_region_is_ram(mem_section.mr)) {
         return;
     }
     mem = mem_section.mr;
diff --git a/hw/misc/vfio.c b/hw/misc/vfio.c
index 693a9ff..c89676b 100644
--- a/hw/misc/vfio.c
+++ b/hw/misc/vfio.c
@@ -1953,7 +1953,7 @@ static void vfio_listener_region_add(MemoryListener *listener,
     }
 
     iova = TARGET_PAGE_ALIGN(section->offset_within_address_space);
-    end = (section->offset_within_address_space + section->size) &
+    end = (section->offset_within_address_space + int128_get64(section->size)) &
           TARGET_PAGE_MASK;
 
     if (iova >= end) {
@@ -1997,7 +1997,7 @@ static void vfio_listener_region_del(MemoryListener *listener,
     }
 
     iova = TARGET_PAGE_ALIGN(section->offset_within_address_space);
-    end = (section->offset_within_address_space + section->size) &
+    end = (section->offset_within_address_space + int128_get64(section->size)) &
           TARGET_PAGE_MASK;
 
     if (iova >= end) {
diff --git a/hw/virtio/dataplane/hostmem.c b/hw/virtio/dataplane/hostmem.c
index 37292ff..7e46723 100644
--- a/hw/virtio/dataplane/hostmem.c
+++ b/hw/virtio/dataplane/hostmem.c
@@ -90,7 +90,7 @@ static void hostmem_append_new_region(HostMem *hostmem,
     hostmem->new_regions[num] = (HostMemRegion){
         .host_addr = ram_ptr + section->offset_within_region,
         .guest_addr = section->offset_within_address_space,
-        .size = section->size,
+        .size = int128_get64(section->size),
         .readonly = section->readonly,
     };
     hostmem->num_new_regions++;
diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c
index fbabf99..baf84ea 100644
--- a/hw/virtio/vhost.c
+++ b/hw/virtio/vhost.c
@@ -81,7 +81,7 @@ static int vhost_sync_dirty_bitmap(struct vhost_dev *dev,
         return 0;
     }
     start_addr = section->offset_within_address_space;
-    end_addr = range_get_last(start_addr, section->size);
+    end_addr = range_get_last(start_addr, int128_get64(section->size));
     start_addr = MAX(first, start_addr);
     end_addr = MIN(last, end_addr);
 
@@ -379,7 +379,7 @@ static void vhost_set_memory(MemoryListener *listener,
     struct vhost_dev *dev = container_of(listener, struct vhost_dev,
                                          memory_listener);
     hwaddr start_addr = section->offset_within_address_space;
-    ram_addr_t size = section->size;
+    ram_addr_t size = int128_get64(section->size);
     bool log_dirty = memory_region_is_logging(section->mr);
     int s = offsetof(struct vhost_memory, regions) +
         (dev->mem->nregions + 1) * sizeof dev->mem->regions[0];
diff --git a/hw/virtio/virtio-balloon.c b/hw/virtio/virtio-balloon.c
index d669756..a27051c 100644
--- a/hw/virtio/virtio-balloon.c
+++ b/hw/virtio/virtio-balloon.c
@@ -197,7 +197,7 @@ static void virtio_balloon_handle_output(VirtIODevice *vdev, VirtQueue *vq)
 
             /* FIXME: remove get_system_memory(), but how? */
             section = memory_region_find(get_system_memory(), pa, 1);
-            if (!section.size || !memory_region_is_ram(section.mr))
+            if (!int128_nz(section.size) || !memory_region_is_ram(section.mr))
                 continue;
 
             /* Using memory_region_get_ram_ptr is bending the rules a bit, but
diff --git a/hw/xen/xen_pt.c b/hw/xen/xen_pt.c
index c199818..c31a28a 100644
--- a/hw/xen/xen_pt.c
+++ b/hw/xen/xen_pt.c
@@ -547,7 +547,7 @@ static void xen_pt_region_update(XenPCIPassthroughState *s,
     struct CheckBarArgs args = {
         .s = s,
         .addr = sec->offset_within_address_space,
-        .size = sec->size,
+        .size = int128_get64(sec->size),
         .rc = false,
     };
 
@@ -576,7 +576,7 @@ static void xen_pt_region_update(XenPCIPassthroughState *s,
     if (d->io_regions[bar].type & PCI_BASE_ADDRESS_SPACE_IO) {
         uint32_t guest_port = sec->offset_within_address_space;
         uint32_t machine_port = s->bases[bar].access.pio_base;
-        uint32_t size = sec->size;
+        uint32_t size = int128_get64(sec->size);
         rc = xc_domain_ioport_mapping(xen_xc, xen_domid,
                                       guest_port, machine_port, size,
                                       op);
@@ -588,7 +588,7 @@ static void xen_pt_region_update(XenPCIPassthroughState *s,
         pcibus_t guest_addr = sec->offset_within_address_space;
         pcibus_t machine_addr = s->bases[bar].access.maddr
             + sec->offset_within_region;
-        pcibus_t size = sec->size;
+        pcibus_t size = int128_get64(sec->size);
         rc = xc_domain_memory_mapping(xen_xc, xen_domid,
                                       XEN_PFN(guest_addr + XC_PAGE_SIZE - 1),
                                       XEN_PFN(machine_addr + XC_PAGE_SIZE - 1),
diff --git a/include/exec/memory.h b/include/exec/memory.h
index c11a3f8..fddc6ad 100644
--- a/include/exec/memory.h
+++ b/include/exec/memory.h
@@ -26,6 +26,9 @@
 #include "exec/ioport.h"
 #include "qemu/int128.h"
 
+#define MAX_PHYS_ADDR_SPACE_BITS 62
+#define MAX_PHYS_ADDR            (((hwaddr)1 << MAX_PHYS_ADDR_SPACE_BITS) - 1)
+
 typedef struct MemoryRegionOps MemoryRegionOps;
 typedef struct MemoryRegionPortio MemoryRegionPortio;
 typedef struct MemoryRegionMmio MemoryRegionMmio;
@@ -185,7 +188,7 @@ struct MemoryRegionSection {
     MemoryRegion *mr;
     AddressSpace *address_space;
     hwaddr offset_within_region;
-    uint64_t size;
+    Int128 size;
     hwaddr offset_within_address_space;
     bool readonly;
 };
diff --git a/include/qemu/int128.h b/include/qemu/int128.h
index b3864b6..bfe7678 100644
--- a/include/qemu/int128.h
+++ b/include/qemu/int128.h
@@ -34,6 +34,25 @@ static inline Int128 int128_2_64(void)
     return (Int128) { 0, 1 };
 }
 
+static inline Int128 int128_and(Int128 a, Int128 b)
+{
+    return (Int128) { a.lo & b.lo, a.hi & b.hi };
+}
+
+static inline Int128 int128_rshift(Int128 a, int n)
+{
+    int64_t h;
+    if (!n) {
+        return a;
+    }
+    h = a.hi >> (n & 63);
+    if (n >= 64) {
+        return (Int128) { h, h >> 63 };
+    } else {
+        return (Int128) { (a.lo >> n) | (a.hi << (64 - n)), h };
+    }
+}
+
 static inline Int128 int128_add(Int128 a, Int128 b)
 {
     Int128 r = { a.lo + b.lo, a.hi + b.hi };
diff --git a/kvm-all.c b/kvm-all.c
index 405480e..e6b262f 100644
--- a/kvm-all.c
+++ b/kvm-all.c
@@ -329,7 +329,7 @@ static void kvm_log_start(MemoryListener *listener,
     int r;
 
     r = kvm_dirty_pages_log_change(section->offset_within_address_space,
-                                   section->size, true);
+                                   int128_get64(section->size), true);
     if (r < 0) {
         abort();
     }
@@ -341,7 +341,7 @@ static void kvm_log_stop(MemoryListener *listener,
     int r;
 
     r = kvm_dirty_pages_log_change(section->offset_within_address_space,
-                                   section->size, false);
+                                   int128_get64(section->size), false);
     if (r < 0) {
         abort();
     }
@@ -379,7 +379,8 @@ static int kvm_get_dirty_pages_log_range(MemoryRegionSection *section,
     unsigned int i, j;
     unsigned long page_number, c;
     hwaddr addr, addr1;
-    unsigned int len = ((section->size / getpagesize()) + HOST_LONG_BITS - 1) / HOST_LONG_BITS;
+    unsigned int pages = int128_get64(section->size) / getpagesize();
+    unsigned int len = (pages + HOST_LONG_BITS - 1) / HOST_LONG_BITS;
     unsigned long hpratio = getpagesize() / TARGET_PAGE_SIZE;
 
     /*
@@ -422,7 +423,7 @@ static int kvm_physical_sync_dirty_bitmap(MemoryRegionSection *section)
     KVMSlot *mem;
     int ret = 0;
     hwaddr start_addr = section->offset_within_address_space;
-    hwaddr end_addr = start_addr + section->size;
+    hwaddr end_addr = start_addr + int128_get64(section->size);
 
     d.dirty_bitmap = NULL;
     while (start_addr < end_addr) {
@@ -634,7 +635,7 @@ static void kvm_set_phys_mem(MemoryRegionSection *section, bool add)
     bool writeable = !mr->readonly && !mr->rom_device;
     bool readonly_flag = mr->readonly || memory_region_is_romd(mr);
     hwaddr start_addr = section->offset_within_address_space;
-    ram_addr_t size = section->size;
+    ram_addr_t size = int128_get64(section->size);
     void *ram = NULL;
     unsigned delta;
 
@@ -832,7 +833,8 @@ static void kvm_mem_ioeventfd_add(MemoryListener *listener,
     int r;
 
     r = kvm_set_ioeventfd_mmio(fd, section->offset_within_address_space,
-                               data, true, section->size, match_data);
+                               data, true, int128_get64(section->size),
+                               match_data);
     if (r < 0) {
         abort();
     }
@@ -847,7 +849,8 @@ static void kvm_mem_ioeventfd_del(MemoryListener *listener,
     int r;
 
     r = kvm_set_ioeventfd_mmio(fd, section->offset_within_address_space,
-                               data, false, section->size, match_data);
+                               data, false, int128_get64(section->size),
+                               match_data);
     if (r < 0) {
         abort();
     }
@@ -862,7 +865,8 @@ static void kvm_io_ioeventfd_add(MemoryListener *listener,
     int r;
 
     r = kvm_set_ioeventfd_pio(fd, section->offset_within_address_space,
-                              data, true, section->size, match_data);
+                              data, true, int128_get64(section->size),
+                              match_data);
     if (r < 0) {
         abort();
     }
@@ -878,7 +882,8 @@ static void kvm_io_ioeventfd_del(MemoryListener *listener,
     int r;
 
     r = kvm_set_ioeventfd_pio(fd, section->offset_within_address_space,
-                              data, false, section->size, match_data);
+                              data, false, int128_get64(section->size),
+                              match_data);
     if (r < 0) {
         abort();
     }
diff --git a/memory.c b/memory.c
index 5cb8f4a..2b19745 100644
--- a/memory.c
+++ b/memory.c
@@ -152,7 +152,7 @@ static bool memory_listener_match(MemoryListener *listener,
         .mr = (fr)->mr,                                                 \
         .address_space = (as),                                          \
         .offset_within_region = (fr)->offset_in_region,                 \
-        .size = int128_get64((fr)->addr.size),                          \
+        .size = (fr)->addr.size,                                        \
         .offset_within_address_space = int128_get64((fr)->addr.start),  \
         .readonly = (fr)->readonly,                                     \
               }))
@@ -634,7 +634,7 @@ static void address_space_add_del_ioeventfds(AddressSpace *as,
             section = (MemoryRegionSection) {
                 .address_space = as,
                 .offset_within_address_space = int128_get64(fd->addr.start),
-                .size = int128_get64(fd->addr.size),
+                .size = fd->addr.size,
             };
             MEMORY_LISTENER_CALL(eventfd_del, Forward, &section,
                                  fd->match_data, fd->data, fd->e);
@@ -647,7 +647,7 @@ static void address_space_add_del_ioeventfds(AddressSpace *as,
             section = (MemoryRegionSection) {
                 .address_space = as,
                 .offset_within_address_space = int128_get64(fd->addr.start),
-                .size = int128_get64(fd->addr.size),
+                .size = fd->addr.size,
             };
             MEMORY_LISTENER_CALL(eventfd_add, Reverse, &section,
                                  fd->match_data, fd->data, fd->e);
@@ -1215,7 +1215,7 @@ static void memory_region_update_coalesced_range_as(MemoryRegion *mr, AddressSpa
             section = (MemoryRegionSection) {
                 .address_space = as,
                 .offset_within_address_space = int128_get64(fr->addr.start),
-                .size = int128_get64(fr->addr.size),
+                .size = fr->addr.size,
             };
 
             MEMORY_LISTENER_CALL(coalesced_mmio_del, Reverse, &section,
@@ -1506,7 +1506,7 @@ static FlatRange *address_space_lookup(AddressSpace *as, AddrRange addr)
 MemoryRegionSection memory_region_find(MemoryRegion *mr,
                                        hwaddr addr, uint64_t size)
 {
-    MemoryRegionSection ret = { .mr = NULL, .size = 0 };
+    MemoryRegionSection ret = { .mr = NULL };
     MemoryRegion *root;
     AddressSpace *as;
     AddrRange range;
@@ -1536,7 +1536,7 @@ MemoryRegionSection memory_region_find(MemoryRegion *mr,
     ret.offset_within_region = fr->offset_in_region;
     ret.offset_within_region += int128_get64(int128_sub(range.start,
                                                         fr->addr.start));
-    ret.size = int128_get64(range.size);
+    ret.size = range.size;
     ret.offset_within_address_space = int128_get64(range.start);
     ret.readonly = fr->readonly;
     return ret;
@@ -1584,7 +1584,7 @@ static void listener_add_address_space(MemoryListener *listener,
             .mr = fr->mr,
             .address_space = as,
             .offset_within_region = fr->offset_in_region,
-            .size = int128_get64(fr->addr.size),
+            .size = fr->addr.size,
             .offset_within_address_space = int128_get64(fr->addr.start),
             .readonly = fr->readonly,
         };
@@ -1712,7 +1712,7 @@ static void mtree_print_mr(fprintf_function mon_printf, void *f,
                    "-" TARGET_FMT_plx "\n",
                    base + mr->addr,
                    base + mr->addr
-                   + (hwaddr)int128_get64(mr->size) - 1,
+                   + (hwaddr)int128_get64(int128_sub(mr->size, int128_make64(1))),
                    mr->priority,
                    mr->romd_mode ? 'R' : '-',
                    !mr->readonly && !(mr->rom_device && mr->romd_mode) ? 'W'
@@ -1727,7 +1727,7 @@ static void mtree_print_mr(fprintf_function mon_printf, void *f,
                    TARGET_FMT_plx "-" TARGET_FMT_plx " (prio %d, %c%c): %s\n",
                    base + mr->addr,
                    base + mr->addr
-                   + (hwaddr)int128_get64(mr->size) - 1,
+                   + (hwaddr)int128_get64(int128_sub(mr->size, int128_make64(1))),
                    mr->priority,
                    mr->romd_mode ? 'R' : '-',
                    !mr->readonly && !(mr->rom_device && mr->romd_mode) ? 'W'
diff --git a/target-sparc/mmu_helper.c b/target-sparc/mmu_helper.c
index a9649ae..3983c96 100644
--- a/target-sparc/mmu_helper.c
+++ b/target-sparc/mmu_helper.c
@@ -845,7 +845,7 @@ hwaddr cpu_get_phys_page_debug(CPUSPARCState *env, target_ulong addr)
         }
     }
     section = memory_region_find(get_system_memory(), phys_addr, 1);
-    if (!section.size) {
+    if (!int128_nz(section.size)) {
         return -1;
     }
     return phys_addr;
diff --git a/xen-all.c b/xen-all.c
index 1a1d7bb..9b57350 100644
--- a/xen-all.c
+++ b/xen-all.c
@@ -418,7 +418,7 @@ static void xen_set_memory(struct MemoryListener *listener,
 {
     XenIOState *state = container_of(listener, XenIOState, memory_listener);
     hwaddr start_addr = section->offset_within_address_space;
-    ram_addr_t size = section->size;
+    ram_addr_t size = int128_get64(section->size);
     bool log_dirty = memory_region_is_logging(section->mr);
     hvmmem_type_t mem_type;
 
@@ -522,7 +522,7 @@ static void xen_log_start(MemoryListener *listener,
     XenIOState *state = container_of(listener, XenIOState, memory_listener);
 
     xen_sync_dirty_bitmap(state, section->offset_within_address_space,
-                          section->size);
+                          int128_get64(section->size));
 }
 
 static void xen_log_stop(MemoryListener *listener, MemoryRegionSection *section)
@@ -539,7 +539,7 @@ static void xen_log_sync(MemoryListener *listener, MemoryRegionSection *section)
     XenIOState *state = container_of(listener, XenIOState, memory_listener);
 
     xen_sync_dirty_bitmap(state, section->offset_within_address_space,
-                          section->size);
+                          int128_get64(section->size));
 }
 
 static void xen_log_global_start(MemoryListener *listener)
commit 733d5ef52721a836b1d9b5cd0f15a41db88829d0
Author: Paolo Bonzini <pbonzini at redhat.com>
Date:   Mon May 27 10:47:10 2013 +0200

    exec: reorganize mem_add to match Int128 version
    
    When adding support for 2^64-byte sections, we will have to change
    the structure of mem_add to avoid failures in int128_get64.
    Reorganize the code now before introducing Int128.
    
    Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>

diff --git a/exec.c b/exec.c
index 0448224..b070b43 100644
--- a/exec.c
+++ b/exec.c
@@ -824,15 +824,11 @@ static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *secti
 static void register_multipage(AddressSpaceDispatch *d, MemoryRegionSection *section)
 {
     hwaddr start_addr = section->offset_within_address_space;
-    ram_addr_t size = section->size;
-    hwaddr addr;
     uint16_t section_index = phys_section_add(section);
+    uint64_t num_pages = section->size >> TARGET_PAGE_BITS;
 
-    assert(size);
-
-    addr = start_addr;
-    phys_page_set(d, addr >> TARGET_PAGE_BITS, size >> TARGET_PAGE_BITS,
-                  section_index);
+    assert(num_pages);
+    phys_page_set(d, start_addr >> TARGET_PAGE_BITS, num_pages, section_index);
 }
 
 static void mem_add(MemoryListener *listener, MemoryRegionSection *section)
@@ -840,32 +836,29 @@ static void mem_add(MemoryListener *listener, MemoryRegionSection *section)
     AddressSpaceDispatch *d = container_of(listener, AddressSpaceDispatch, listener);
     MemoryRegionSection now = *section, remain = *section;
 
-    if ((now.offset_within_address_space & ~TARGET_PAGE_MASK)
-        || (now.size < TARGET_PAGE_SIZE)) {
-        now.size = MIN(TARGET_PAGE_ALIGN(now.offset_within_address_space)
-                       - now.offset_within_address_space,
-                       now.size);
+    if (now.offset_within_address_space & ~TARGET_PAGE_MASK) {
+        uint64_t left = TARGET_PAGE_ALIGN(now.offset_within_address_space)
+                       - now.offset_within_address_space;
+
+        now.size = MIN(left, now.size);
         register_subpage(d, &now);
+    } else {
+        now.size = 0;
+    }
+    while (remain.size != now.size) {
         remain.size -= now.size;
         remain.offset_within_address_space += now.size;
         remain.offset_within_region += now.size;
-    }
-    while (remain.size >= TARGET_PAGE_SIZE) {
         now = remain;
-        if (remain.offset_within_region & ~TARGET_PAGE_MASK) {
+        if (remain.size < TARGET_PAGE_SIZE) {
+            register_subpage(d, &now);
+        } else if (remain.offset_within_region & ~TARGET_PAGE_MASK) {
             now.size = TARGET_PAGE_SIZE;
             register_subpage(d, &now);
         } else {
-            now.size &= TARGET_PAGE_MASK;
+            now.size &= -TARGET_PAGE_SIZE;
             register_multipage(d, &now);
         }
-        remain.size -= now.size;
-        remain.offset_within_address_space += now.size;
-        remain.offset_within_region += now.size;
-    }
-    now = remain;
-    if (now.size) {
-        register_subpage(d, &now);
     }
 }
 
commit 5b23fd03324096056cf1f6bcf111224760d4c67c
Author: Paolo Bonzini <pbonzini at redhat.com>
Date:   Mon May 27 13:18:06 2013 +0200

    Revert "s390x: reduce TARGET_PHYS_ADDR_SPACE_BITS to 62"
    
    This reverts commit 311f83ca08c011b048c063c2fd3038a8957970bc.
    
    Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>

diff --git a/target-s390x/cpu.h b/target-s390x/cpu.h
index 6304c4d..0ce82cf 100644
--- a/target-s390x/cpu.h
+++ b/target-s390x/cpu.h
@@ -34,10 +34,7 @@
 #include "exec/cpu-defs.h"
 #define TARGET_PAGE_BITS 12
 
-/* Actually 64-bits, limited by the memory API to 62 bits.  We
- * never use that much.
- */
-#define TARGET_PHYS_ADDR_SPACE_BITS 62
+#define TARGET_PHYS_ADDR_SPACE_BITS 64
 #define TARGET_VIRT_ADDR_SPACE_BITS 64
 
 #include "exec/cpu-all.h"
commit 99b9cc0679585b2d495d7d31ce556549b6b2721c
Author: Paolo Bonzini <pbonzini at redhat.com>
Date:   Mon May 27 13:18:01 2013 +0200

    Revert "memory: limit sections in the radix tree to the actual address space size"
    
    This reverts commit 86a8623692b1b559a419a92eb8b6897c221bca74.
    
    Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>

diff --git a/exec.c b/exec.c
index 3a8ef42..0448224 100644
--- a/exec.c
+++ b/exec.c
@@ -835,21 +835,10 @@ static void register_multipage(AddressSpaceDispatch *d, MemoryRegionSection *sec
                   section_index);
 }
 
-QEMU_BUILD_BUG_ON(TARGET_PHYS_ADDR_SPACE_BITS > MAX_PHYS_ADDR_SPACE_BITS)
-
-static MemoryRegionSection limit(MemoryRegionSection section)
-{
-    section.size = MIN(section.offset_within_address_space + section.size,
-                       MAX_PHYS_ADDR + 1)
-                   - section.offset_within_address_space;
-
-    return section;
-}
-
 static void mem_add(MemoryListener *listener, MemoryRegionSection *section)
 {
     AddressSpaceDispatch *d = container_of(listener, AddressSpaceDispatch, listener);
-    MemoryRegionSection now = limit(*section), remain = limit(*section);
+    MemoryRegionSection now = *section, remain = *section;
 
     if ((now.offset_within_address_space & ~TARGET_PAGE_MASK)
         || (now.size < TARGET_PAGE_SIZE)) {
diff --git a/include/exec/memory.h b/include/exec/memory.h
index c747f67..c11a3f8 100644
--- a/include/exec/memory.h
+++ b/include/exec/memory.h
@@ -26,9 +26,6 @@
 #include "exec/ioport.h"
 #include "qemu/int128.h"
 
-#define MAX_PHYS_ADDR_SPACE_BITS 62
-#define MAX_PHYS_ADDR            (((hwaddr)1 << MAX_PHYS_ADDR_SPACE_BITS) - 1)
-
 typedef struct MemoryRegionOps MemoryRegionOps;
 typedef struct MemoryRegionPortio MemoryRegionPortio;
 typedef struct MemoryRegionMmio MemoryRegionMmio;
commit 5c8a00ce186b9a58d99b0afff90f87d5760bb44b
Author: Paolo Bonzini <pbonzini at redhat.com>
Date:   Wed May 29 12:42:00 2013 +0200

    exec: return MemoryRegion from address_space_translate
    
    Only address_space_translate_for_iotlb needs to return the section.
    Every caller of address_space_translate now uses only section->mr,
    return it directly.
    
    Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>

diff --git a/exec.c b/exec.c
index ffd2dc8..3a8ef42 100644
--- a/exec.c
+++ b/exec.c
@@ -262,11 +262,11 @@ address_space_translate_internal(AddressSpace *as, hwaddr addr, hwaddr *xlat,
     return section;
 }
 
-MemoryRegionSection *address_space_translate(AddressSpace *as, hwaddr addr,
-                                             hwaddr *xlat, hwaddr *plen,
-                                             bool is_write)
+MemoryRegion *address_space_translate(AddressSpace *as, hwaddr addr,
+                                      hwaddr *xlat, hwaddr *plen,
+                                      bool is_write)
 {
-    return address_space_translate_internal(as, addr, xlat, plen, true);
+    return address_space_translate_internal(as, addr, xlat, plen, true)->mr;
 }
 
 MemoryRegionSection *
@@ -1923,58 +1923,58 @@ bool address_space_rw(AddressSpace *as, hwaddr addr, uint8_t *buf,
     uint8_t *ptr;
     uint64_t val;
     hwaddr addr1;
-    MemoryRegionSection *section;
+    MemoryRegion *mr;
     bool error = false;
 
     while (len > 0) {
         l = len;
-        section = address_space_translate(as, addr, &addr1, &l, is_write);
+        mr = address_space_translate(as, addr, &addr1, &l, is_write);
 
         if (is_write) {
-            if (!memory_access_is_direct(section->mr, is_write)) {
-                l = memory_access_size(section->mr, l, addr1);
+            if (!memory_access_is_direct(mr, is_write)) {
+                l = memory_access_size(mr, l, addr1);
                 /* XXX: could force cpu_single_env to NULL to avoid
                    potential bugs */
                 if (l == 4) {
                     /* 32 bit write access */
                     val = ldl_p(buf);
-                    error |= io_mem_write(section->mr, addr1, val, 4);
+                    error |= io_mem_write(mr, addr1, val, 4);
                 } else if (l == 2) {
                     /* 16 bit write access */
                     val = lduw_p(buf);
-                    error |= io_mem_write(section->mr, addr1, val, 2);
+                    error |= io_mem_write(mr, addr1, val, 2);
                 } else {
                     /* 8 bit write access */
                     val = ldub_p(buf);
-                    error |= io_mem_write(section->mr, addr1, val, 1);
+                    error |= io_mem_write(mr, addr1, val, 1);
                 }
             } else {
-                addr1 += memory_region_get_ram_addr(section->mr);
+                addr1 += memory_region_get_ram_addr(mr);
                 /* RAM case */
                 ptr = qemu_get_ram_ptr(addr1);
                 memcpy(ptr, buf, l);
                 invalidate_and_set_dirty(addr1, l);
             }
         } else {
-            if (!memory_access_is_direct(section->mr, is_write)) {
+            if (!memory_access_is_direct(mr, is_write)) {
                 /* I/O case */
-                l = memory_access_size(section->mr, l, addr1);
+                l = memory_access_size(mr, l, addr1);
                 if (l == 4) {
                     /* 32 bit read access */
-                    error |= io_mem_read(section->mr, addr1, &val, 4);
+                    error |= io_mem_read(mr, addr1, &val, 4);
                     stl_p(buf, val);
                 } else if (l == 2) {
                     /* 16 bit read access */
-                    error |= io_mem_read(section->mr, addr1, &val, 2);
+                    error |= io_mem_read(mr, addr1, &val, 2);
                     stw_p(buf, val);
                 } else {
                     /* 8 bit read access */
-                    error |= io_mem_read(section->mr, addr1, &val, 1);
+                    error |= io_mem_read(mr, addr1, &val, 1);
                     stb_p(buf, val);
                 }
             } else {
                 /* RAM case */
-                ptr = qemu_get_ram_ptr(section->mr->ram_addr + addr1);
+                ptr = qemu_get_ram_ptr(mr->ram_addr + addr1);
                 memcpy(buf, ptr, l);
             }
         }
@@ -2011,18 +2011,18 @@ void cpu_physical_memory_write_rom(hwaddr addr,
     hwaddr l;
     uint8_t *ptr;
     hwaddr addr1;
-    MemoryRegionSection *section;
+    MemoryRegion *mr;
 
     while (len > 0) {
         l = len;
-        section = address_space_translate(&address_space_memory,
-                                          addr, &addr1, &l, true);
+        mr = address_space_translate(&address_space_memory,
+                                     addr, &addr1, &l, true);
 
-        if (!(memory_region_is_ram(section->mr) ||
-              memory_region_is_romd(section->mr))) {
+        if (!(memory_region_is_ram(mr) ||
+              memory_region_is_romd(mr))) {
             /* do nothing */
         } else {
-            addr1 += memory_region_get_ram_addr(section->mr);
+            addr1 += memory_region_get_ram_addr(mr);
             /* ROM/RAM case */
             ptr = qemu_get_ram_ptr(addr1);
             memcpy(ptr, buf, l);
@@ -2082,15 +2082,15 @@ static void cpu_notify_map_clients(void)
 
 bool address_space_access_valid(AddressSpace *as, hwaddr addr, int len, bool is_write)
 {
-    MemoryRegionSection *section;
+    MemoryRegion *mr;
     hwaddr l, xlat;
 
     while (len > 0) {
         l = len;
-        section = address_space_translate(as, addr, &xlat, &l, is_write);
-        if (!memory_access_is_direct(section->mr, is_write)) {
-            l = memory_access_size(section->mr, l, addr);
-            if (!memory_region_access_valid(section->mr, xlat, l, is_write)) {
+        mr = address_space_translate(as, addr, &xlat, &l, is_write);
+        if (!memory_access_is_direct(mr, is_write)) {
+            l = memory_access_size(mr, l, addr);
+            if (!memory_region_access_valid(mr, xlat, l, is_write)) {
                 return false;
             }
         }
@@ -2116,16 +2116,16 @@ void *address_space_map(AddressSpace *as,
     hwaddr len = *plen;
     hwaddr todo = 0;
     hwaddr l, xlat;
-    MemoryRegionSection *section;
+    MemoryRegion *mr;
     ram_addr_t raddr = RAM_ADDR_MAX;
     ram_addr_t rlen;
     void *ret;
 
     while (len > 0) {
         l = len;
-        section = address_space_translate(as, addr, &xlat, &l, is_write);
+        mr = address_space_translate(as, addr, &xlat, &l, is_write);
 
-        if (!memory_access_is_direct(section->mr, is_write)) {
+        if (!memory_access_is_direct(mr, is_write)) {
             if (todo || bounce.buffer) {
                 break;
             }
@@ -2140,9 +2140,9 @@ void *address_space_map(AddressSpace *as,
             return bounce.buffer;
         }
         if (!todo) {
-            raddr = memory_region_get_ram_addr(section->mr) + xlat;
+            raddr = memory_region_get_ram_addr(mr) + xlat;
         } else {
-            if (memory_region_get_ram_addr(section->mr) + xlat != raddr + todo) {
+            if (memory_region_get_ram_addr(mr) + xlat != raddr + todo) {
                 break;
             }
         }
@@ -2209,15 +2209,15 @@ static inline uint32_t ldl_phys_internal(hwaddr addr,
 {
     uint8_t *ptr;
     uint64_t val;
-    MemoryRegionSection *section;
+    MemoryRegion *mr;
     hwaddr l = 4;
     hwaddr addr1;
 
-    section = address_space_translate(&address_space_memory, addr, &addr1, &l,
-                                      false);
-    if (l < 4 || !memory_access_is_direct(section->mr, false)) {
+    mr = address_space_translate(&address_space_memory, addr, &addr1, &l,
+                                 false);
+    if (l < 4 || !memory_access_is_direct(mr, false)) {
         /* I/O case */
-        io_mem_read(section->mr, addr1, &val, 4);
+        io_mem_read(mr, addr1, &val, 4);
 #if defined(TARGET_WORDS_BIGENDIAN)
         if (endian == DEVICE_LITTLE_ENDIAN) {
             val = bswap32(val);
@@ -2229,7 +2229,7 @@ static inline uint32_t ldl_phys_internal(hwaddr addr,
 #endif
     } else {
         /* RAM case */
-        ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
+        ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
                                 & TARGET_PAGE_MASK)
                                + addr1);
         switch (endian) {
@@ -2268,15 +2268,15 @@ static inline uint64_t ldq_phys_internal(hwaddr addr,
 {
     uint8_t *ptr;
     uint64_t val;
-    MemoryRegionSection *section;
+    MemoryRegion *mr;
     hwaddr l = 8;
     hwaddr addr1;
 
-    section = address_space_translate(&address_space_memory, addr, &addr1, &l,
-                                      false);
-    if (l < 8 || !memory_access_is_direct(section->mr, false)) {
+    mr = address_space_translate(&address_space_memory, addr, &addr1, &l,
+                                 false);
+    if (l < 8 || !memory_access_is_direct(mr, false)) {
         /* I/O case */
-        io_mem_read(section->mr, addr1, &val, 8);
+        io_mem_read(mr, addr1, &val, 8);
 #if defined(TARGET_WORDS_BIGENDIAN)
         if (endian == DEVICE_LITTLE_ENDIAN) {
             val = bswap64(val);
@@ -2288,7 +2288,7 @@ static inline uint64_t ldq_phys_internal(hwaddr addr,
 #endif
     } else {
         /* RAM case */
-        ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
+        ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
                                 & TARGET_PAGE_MASK)
                                + addr1);
         switch (endian) {
@@ -2335,15 +2335,15 @@ static inline uint32_t lduw_phys_internal(hwaddr addr,
 {
     uint8_t *ptr;
     uint64_t val;
-    MemoryRegionSection *section;
+    MemoryRegion *mr;
     hwaddr l = 2;
     hwaddr addr1;
 
-    section = address_space_translate(&address_space_memory, addr, &addr1, &l,
-                                      false);
-    if (l < 2 || !memory_access_is_direct(section->mr, false)) {
+    mr = address_space_translate(&address_space_memory, addr, &addr1, &l,
+                                 false);
+    if (l < 2 || !memory_access_is_direct(mr, false)) {
         /* I/O case */
-        io_mem_read(section->mr, addr1, &val, 2);
+        io_mem_read(mr, addr1, &val, 2);
 #if defined(TARGET_WORDS_BIGENDIAN)
         if (endian == DEVICE_LITTLE_ENDIAN) {
             val = bswap16(val);
@@ -2355,7 +2355,7 @@ static inline uint32_t lduw_phys_internal(hwaddr addr,
 #endif
     } else {
         /* RAM case */
-        ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
+        ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
                                 & TARGET_PAGE_MASK)
                                + addr1);
         switch (endian) {
@@ -2394,16 +2394,16 @@ uint32_t lduw_be_phys(hwaddr addr)
 void stl_phys_notdirty(hwaddr addr, uint32_t val)
 {
     uint8_t *ptr;
-    MemoryRegionSection *section;
+    MemoryRegion *mr;
     hwaddr l = 4;
     hwaddr addr1;
 
-    section = address_space_translate(&address_space_memory, addr, &addr1, &l,
-                                      true);
-    if (l < 4 || !memory_access_is_direct(section->mr, true)) {
-        io_mem_write(section->mr, addr1, val, 4);
+    mr = address_space_translate(&address_space_memory, addr, &addr1, &l,
+                                 true);
+    if (l < 4 || !memory_access_is_direct(mr, true)) {
+        io_mem_write(mr, addr1, val, 4);
     } else {
-        addr1 += memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK;
+        addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
         ptr = qemu_get_ram_ptr(addr1);
         stl_p(ptr, val);
 
@@ -2424,13 +2424,13 @@ static inline void stl_phys_internal(hwaddr addr, uint32_t val,
                                      enum device_endian endian)
 {
     uint8_t *ptr;
-    MemoryRegionSection *section;
+    MemoryRegion *mr;
     hwaddr l = 4;
     hwaddr addr1;
 
-    section = address_space_translate(&address_space_memory, addr, &addr1, &l,
-                                      true);
-    if (l < 4 || !memory_access_is_direct(section->mr, true)) {
+    mr = address_space_translate(&address_space_memory, addr, &addr1, &l,
+                                 true);
+    if (l < 4 || !memory_access_is_direct(mr, true)) {
 #if defined(TARGET_WORDS_BIGENDIAN)
         if (endian == DEVICE_LITTLE_ENDIAN) {
             val = bswap32(val);
@@ -2440,10 +2440,10 @@ static inline void stl_phys_internal(hwaddr addr, uint32_t val,
             val = bswap32(val);
         }
 #endif
-        io_mem_write(section->mr, addr1, val, 4);
+        io_mem_write(mr, addr1, val, 4);
     } else {
         /* RAM case */
-        addr1 += memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK;
+        addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
         ptr = qemu_get_ram_ptr(addr1);
         switch (endian) {
         case DEVICE_LITTLE_ENDIAN:
@@ -2487,13 +2487,13 @@ static inline void stw_phys_internal(hwaddr addr, uint32_t val,
                                      enum device_endian endian)
 {
     uint8_t *ptr;
-    MemoryRegionSection *section;
+    MemoryRegion *mr;
     hwaddr l = 2;
     hwaddr addr1;
 
-    section = address_space_translate(&address_space_memory, addr, &addr1, &l,
-                                      true);
-    if (l < 2 || !memory_access_is_direct(section->mr, true)) {
+    mr = address_space_translate(&address_space_memory, addr, &addr1, &l,
+                                 true);
+    if (l < 2 || !memory_access_is_direct(mr, true)) {
 #if defined(TARGET_WORDS_BIGENDIAN)
         if (endian == DEVICE_LITTLE_ENDIAN) {
             val = bswap16(val);
@@ -2503,10 +2503,10 @@ static inline void stw_phys_internal(hwaddr addr, uint32_t val,
             val = bswap16(val);
         }
 #endif
-        io_mem_write(section->mr, addr1, val, 2);
+        io_mem_write(mr, addr1, val, 2);
     } else {
         /* RAM case */
-        addr1 += memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK;
+        addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
         ptr = qemu_get_ram_ptr(addr1);
         switch (endian) {
         case DEVICE_LITTLE_ENDIAN:
@@ -2608,13 +2608,13 @@ bool virtio_is_big_endian(void)
 #ifndef CONFIG_USER_ONLY
 bool cpu_physical_memory_is_io(hwaddr phys_addr)
 {
-    MemoryRegionSection *section;
+    MemoryRegion*mr;
     hwaddr l = 1;
 
-    section = address_space_translate(&address_space_memory,
-                                      phys_addr, &phys_addr, &l, false);
+    mr = address_space_translate(&address_space_memory,
+                                 phys_addr, &phys_addr, &l, false);
 
-    return !(memory_region_is_ram(section->mr) ||
-             memory_region_is_romd(section->mr));
+    return !(memory_region_is_ram(mr) ||
+             memory_region_is_romd(mr));
 }
 #endif
diff --git a/include/exec/memory.h b/include/exec/memory.h
index d53a6a1..c747f67 100644
--- a/include/exec/memory.h
+++ b/include/exec/memory.h
@@ -859,7 +859,7 @@ bool address_space_write(AddressSpace *as, hwaddr addr,
 bool address_space_read(AddressSpace *as, hwaddr addr, uint8_t *buf, int len);
 
 /* address_space_translate: translate an address range into an address space
- * into a MemoryRegionSection and an address range into that section
+ * into a MemoryRegion and an address range into that section
  *
  * @as: #AddressSpace to be accessed
  * @addr: address within that address space
@@ -868,9 +868,9 @@ bool address_space_read(AddressSpace *as, hwaddr addr, uint8_t *buf, int len);
  * @len: pointer to length
  * @is_write: indicates the transfer direction
  */
-MemoryRegionSection *address_space_translate(AddressSpace *as, hwaddr addr,
-                                             hwaddr *xlat, hwaddr *len,
-                                             bool is_write);
+MemoryRegion *address_space_translate(AddressSpace *as, hwaddr addr,
+                                      hwaddr *xlat, hwaddr *len,
+                                      bool is_write);
 
 /* address_space_access_valid: check for validity of accessing an address
  * space range
diff --git a/translate-all.c b/translate-all.c
index df7c697..9acb2b1 100644
--- a/translate-all.c
+++ b/translate-all.c
@@ -1355,15 +1355,15 @@ static TranslationBlock *tb_find_pc(uintptr_t tc_ptr)
 void tb_invalidate_phys_addr(hwaddr addr)
 {
     ram_addr_t ram_addr;
-    MemoryRegionSection *section;
+    MemoryRegion *mr;
     hwaddr l = 1;
 
-    section = address_space_translate(&address_space_memory, addr, &addr, &l, false);
-    if (!(memory_region_is_ram(section->mr)
-          || memory_region_is_romd(section->mr))) {
+    mr = address_space_translate(&address_space_memory, addr, &addr, &l, false);
+    if (!(memory_region_is_ram(mr)
+          || memory_region_is_romd(mr))) {
         return;
     }
-    ram_addr = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
+    ram_addr = (memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK)
         + addr;
     tb_invalidate_phys_page_range(ram_addr, ram_addr + 1, 0);
 }
commit acc9d80b26e8cb4667b5b336963d91233b6a98c4
Author: Jan Kiszka <jan.kiszka at siemens.com>
Date:   Sun May 26 21:55:37 2013 +0200

    exec: Implement subpage_read/write via address_space_rw
    
    This will allow to add support for unaligned memory regions: the subpage
    container region can activate unaligned support unconditionally because
    the read/write handler will now ensure that accesses are split as
    required by calling address_space_rw. We can furthermore drop the
    special handling of RAM subpages, address_space_rw takes care of this
    already.
    
    Signed-off-by: Jan Kiszka <jan.kiszka at siemens.com>
    Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>

diff --git a/exec.c b/exec.c
index a59abc7..ffd2dc8 100644
--- a/exec.c
+++ b/exec.c
@@ -66,7 +66,7 @@ AddressSpace address_space_memory;
 DMAContext dma_context_memory;
 
 MemoryRegion io_mem_rom, io_mem_notdirty;
-static MemoryRegion io_mem_unassigned, io_mem_subpage_ram;
+static MemoryRegion io_mem_unassigned;
 
 #endif
 
@@ -95,11 +95,13 @@ struct AddressSpaceDispatch {
      */
     PhysPageEntry phys_map;
     MemoryListener listener;
+    AddressSpace *as;
 };
 
 #define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
 typedef struct subpage_t {
     MemoryRegion iomem;
+    AddressSpace *as;
     hwaddr base;
     uint16_t sub_section[TARGET_PAGE_SIZE];
 } subpage_t;
@@ -729,7 +731,7 @@ hwaddr memory_region_section_get_iotlb(CPUArchState *env,
 
 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
                              uint16_t section);
-static subpage_t *subpage_init(hwaddr base);
+static subpage_t *subpage_init(AddressSpace *as, hwaddr base);
 static void destroy_page_desc(uint16_t section_index)
 {
     MemoryRegionSection *section = &phys_sections[section_index];
@@ -806,7 +808,7 @@ static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *secti
     assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
 
     if (!(existing->mr->subpage)) {
-        subpage = subpage_init(base);
+        subpage = subpage_init(d->as, base);
         subsection.mr = &subpage->iomem;
         phys_page_set(d, base >> TARGET_PAGE_BITS, 1,
                       phys_section_add(&subsection));
@@ -1569,60 +1571,64 @@ static const MemoryRegionOps watch_mem_ops = {
 static uint64_t subpage_read(void *opaque, hwaddr addr,
                              unsigned len)
 {
-    subpage_t *mmio = opaque;
-    unsigned int idx = SUBPAGE_IDX(addr);
-    uint64_t val;
+    subpage_t *subpage = opaque;
+    uint8_t buf[4];
 
-    MemoryRegionSection *section;
 #if defined(DEBUG_SUBPAGE)
-    printf("%s: subpage %p len %d addr " TARGET_FMT_plx " idx %d\n", __func__,
-           mmio, len, addr, idx);
+    printf("%s: subpage %p len %d addr " TARGET_FMT_plx "\n", __func__,
+           subpage, len, addr);
 #endif
-
-    section = &phys_sections[mmio->sub_section[idx]];
-    addr += mmio->base;
-    addr -= section->offset_within_address_space;
-    addr += section->offset_within_region;
-    io_mem_read(section->mr, addr, &val, len);
-    return val;
+    address_space_read(subpage->as, addr + subpage->base, buf, len);
+    switch (len) {
+    case 1:
+        return ldub_p(buf);
+    case 2:
+        return lduw_p(buf);
+    case 4:
+        return ldl_p(buf);
+    default:
+        abort();
+    }
 }
 
 static void subpage_write(void *opaque, hwaddr addr,
                           uint64_t value, unsigned len)
 {
-    subpage_t *mmio = opaque;
-    unsigned int idx = SUBPAGE_IDX(addr);
-    MemoryRegionSection *section;
+    subpage_t *subpage = opaque;
+    uint8_t buf[4];
+
 #if defined(DEBUG_SUBPAGE)
     printf("%s: subpage %p len %d addr " TARGET_FMT_plx
-           " idx %d value %"PRIx64"\n",
-           __func__, mmio, len, addr, idx, value);
+           " value %"PRIx64"\n",
+           __func__, subpage, len, addr, value);
 #endif
-
-    section = &phys_sections[mmio->sub_section[idx]];
-    addr += mmio->base;
-    addr -= section->offset_within_address_space;
-    addr += section->offset_within_region;
-    io_mem_write(section->mr, addr, value, len);
+    switch (len) {
+    case 1:
+        stb_p(buf, value);
+        break;
+    case 2:
+        stw_p(buf, value);
+        break;
+    case 4:
+        stl_p(buf, value);
+        break;
+    default:
+        abort();
+    }
+    address_space_write(subpage->as, addr + subpage->base, buf, len);
 }
 
 static bool subpage_accepts(void *opaque, hwaddr addr,
                             unsigned size, bool is_write)
 {
-    subpage_t *mmio = opaque;
-    unsigned int idx = SUBPAGE_IDX(addr);
-    MemoryRegionSection *section;
+    subpage_t *subpage = opaque;
 #if defined(DEBUG_SUBPAGE)
-    printf("%s: subpage %p %c len %d addr " TARGET_FMT_plx
-           " idx %d\n", __func__, mmio,
-           is_write ? 'w' : 'r', len, addr, idx);
+    printf("%s: subpage %p %c len %d addr " TARGET_FMT_plx "\n",
+           __func__, subpage, is_write ? 'w' : 'r', len, addr);
 #endif
 
-    section = &phys_sections[mmio->sub_section[idx]];
-    addr += mmio->base;
-    addr -= section->offset_within_address_space;
-    addr += section->offset_within_region;
-    return memory_region_access_valid(section->mr, addr, size, is_write);
+    return address_space_access_valid(subpage->as, addr + subpage->base,
+                                      size, is_write);
 }
 
 static const MemoryRegionOps subpage_ops = {
@@ -1632,38 +1638,6 @@ static const MemoryRegionOps subpage_ops = {
     .endianness = DEVICE_NATIVE_ENDIAN,
 };
 
-static uint64_t subpage_ram_read(void *opaque, hwaddr addr,
-                                 unsigned size)
-{
-    ram_addr_t raddr = addr;
-    void *ptr = qemu_get_ram_ptr(raddr);
-    switch (size) {
-    case 1: return ldub_p(ptr);
-    case 2: return lduw_p(ptr);
-    case 4: return ldl_p(ptr);
-    default: abort();
-    }
-}
-
-static void subpage_ram_write(void *opaque, hwaddr addr,
-                              uint64_t value, unsigned size)
-{
-    ram_addr_t raddr = addr;
-    void *ptr = qemu_get_ram_ptr(raddr);
-    switch (size) {
-    case 1: return stb_p(ptr, value);
-    case 2: return stw_p(ptr, value);
-    case 4: return stl_p(ptr, value);
-    default: abort();
-    }
-}
-
-static const MemoryRegionOps subpage_ram_ops = {
-    .read = subpage_ram_read,
-    .write = subpage_ram_write,
-    .endianness = DEVICE_NATIVE_ENDIAN,
-};
-
 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
                              uint16_t section)
 {
@@ -1677,11 +1651,6 @@ static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
     printf("%s: %p start %08x end %08x idx %08x eidx %08x mem %ld\n", __func__,
            mmio, start, end, idx, eidx, memory);
 #endif
-    if (memory_region_is_ram(phys_sections[section].mr)) {
-        MemoryRegionSection new_section = phys_sections[section];
-        new_section.mr = &io_mem_subpage_ram;
-        section = phys_section_add(&new_section);
-    }
     for (; idx <= eidx; idx++) {
         mmio->sub_section[idx] = section;
     }
@@ -1689,12 +1658,13 @@ static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
     return 0;
 }
 
-static subpage_t *subpage_init(hwaddr base)
+static subpage_t *subpage_init(AddressSpace *as, hwaddr base)
 {
     subpage_t *mmio;
 
     mmio = g_malloc0(sizeof(subpage_t));
 
+    mmio->as = as;
     mmio->base = base;
     memory_region_init_io(&mmio->iomem, &subpage_ops, mmio,
                           "subpage", TARGET_PAGE_SIZE);
@@ -1732,8 +1702,6 @@ static void io_mem_init(void)
                           "unassigned", UINT64_MAX);
     memory_region_init_io(&io_mem_notdirty, &notdirty_mem_ops, NULL,
                           "notdirty", UINT64_MAX);
-    memory_region_init_io(&io_mem_subpage_ram, &subpage_ram_ops, NULL,
-                          "subpage-ram", UINT64_MAX);
     memory_region_init_io(&io_mem_watch, &watch_mem_ops, NULL,
                           "watch", UINT64_MAX);
 }
@@ -1823,6 +1791,7 @@ void address_space_init_dispatch(AddressSpace *as)
         .region_nop = mem_add,
         .priority = 0,
     };
+    d->as = as;
     as->dispatch = d;
     memory_listener_register(&d->listener, as);
 }
commit 90260c6c0960b2745d79455ceaa6cb50fb796e02
Author: Jan Kiszka <jan.kiszka at siemens.com>
Date:   Sun May 26 21:46:51 2013 +0200

    exec: Resolve subpages in one step except for IOTLB fills
    
    Except for the case of setting the IOTLB entry in TCG mode, we can avoid
    the subpage dispatching handlers and do the resolution directly on
    address_space_lookup_region. An IOTLB entry describes a full page, not
    only the region that the first access to a sub-divided page may return.
    
    This patch therefore introduces a special translation function,
    address_space_translate_for_iotlb, that avoids the subpage resolutions.
    In contrast, callers of the existing address_space_translate service
    will now always receive the terminal memory region section. This will be
    important for breaking the BQL and for enabling unaligned memory region.
    
    Signed-off-by: Jan Kiszka <jan.kiszka at siemens.com>
    Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>

diff --git a/cputlb.c b/cputlb.c
index 1230e9e..947f17c 100644
--- a/cputlb.c
+++ b/cputlb.c
@@ -256,8 +256,8 @@ void tlb_set_page(CPUArchState *env, target_ulong vaddr,
     }
 
     sz = size;
-    section = address_space_translate(&address_space_memory, paddr, &xlat, &sz,
-                                      false);
+    section = address_space_translate_for_iotlb(&address_space_memory, paddr,
+                                                &xlat, &sz);
     assert(sz >= TARGET_PAGE_SIZE);
 
 #if defined(DEBUG_TLB)
diff --git a/exec.c b/exec.c
index 9c6f1fe..a59abc7 100644
--- a/exec.c
+++ b/exec.c
@@ -97,6 +97,13 @@ struct AddressSpaceDispatch {
     MemoryListener listener;
 };
 
+#define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
+typedef struct subpage_t {
+    MemoryRegion iomem;
+    hwaddr base;
+    uint16_t sub_section[TARGET_PAGE_SIZE];
+} subpage_t;
+
 static MemoryRegionSection *phys_sections;
 static unsigned phys_sections_nb, phys_sections_nb_alloc;
 static uint16_t phys_section_unassigned;
@@ -220,19 +227,28 @@ bool memory_region_is_unassigned(MemoryRegion *mr)
 }
 
 static MemoryRegionSection *address_space_lookup_region(AddressSpace *as,
-                                                        hwaddr addr)
+                                                        hwaddr addr,
+                                                        bool resolve_subpage)
 {
-    return phys_page_find(as->dispatch, addr >> TARGET_PAGE_BITS);
+    MemoryRegionSection *section;
+    subpage_t *subpage;
+
+    section = phys_page_find(as->dispatch, addr >> TARGET_PAGE_BITS);
+    if (resolve_subpage && section->mr->subpage) {
+        subpage = container_of(section->mr, subpage_t, iomem);
+        section = &phys_sections[subpage->sub_section[SUBPAGE_IDX(addr)]];
+    }
+    return section;
 }
 
-MemoryRegionSection *address_space_translate(AddressSpace *as, hwaddr addr,
-                                             hwaddr *xlat, hwaddr *plen,
-                                             bool is_write)
+static MemoryRegionSection *
+address_space_translate_internal(AddressSpace *as, hwaddr addr, hwaddr *xlat,
+                                 hwaddr *plen, bool resolve_subpage)
 {
     MemoryRegionSection *section;
     Int128 diff;
 
-    section = address_space_lookup_region(as, addr);
+    section = address_space_lookup_region(as, addr, resolve_subpage);
     /* Compute offset within MemoryRegionSection */
     addr -= section->offset_within_address_space;
 
@@ -243,6 +259,20 @@ MemoryRegionSection *address_space_translate(AddressSpace *as, hwaddr addr,
     *plen = int128_get64(int128_min(diff, int128_make64(*plen)));
     return section;
 }
+
+MemoryRegionSection *address_space_translate(AddressSpace *as, hwaddr addr,
+                                             hwaddr *xlat, hwaddr *plen,
+                                             bool is_write)
+{
+    return address_space_translate_internal(as, addr, xlat, plen, true);
+}
+
+MemoryRegionSection *
+address_space_translate_for_iotlb(AddressSpace *as, hwaddr addr, hwaddr *xlat,
+                                  hwaddr *plen)
+{
+    return address_space_translate_internal(as, addr, xlat, plen, false);
+}
 #endif
 
 void cpu_exec_init_all(void)
@@ -697,13 +727,6 @@ hwaddr memory_region_section_get_iotlb(CPUArchState *env,
 
 #if !defined(CONFIG_USER_ONLY)
 
-#define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
-typedef struct subpage_t {
-    MemoryRegion iomem;
-    hwaddr base;
-    uint16_t sub_section[TARGET_PAGE_SIZE];
-} subpage_t;
-
 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
                              uint16_t section);
 static subpage_t *subpage_init(hwaddr base);
diff --git a/include/exec/cputlb.h b/include/exec/cputlb.h
index e821660..e21cb60 100644
--- a/include/exec/cputlb.h
+++ b/include/exec/cputlb.h
@@ -32,6 +32,10 @@ extern int tlb_flush_count;
 
 /* exec.c */
 void tb_flush_jmp_cache(CPUArchState *env, target_ulong addr);
+
+MemoryRegionSection *
+address_space_translate_for_iotlb(AddressSpace *as, hwaddr addr, hwaddr *xlat,
+                                  hwaddr *plen);
 hwaddr memory_region_section_get_iotlb(CPUArchState *env,
                                        MemoryRegionSection *section,
                                        target_ulong vaddr,
commit f52cc467426e43792eb39f81705766bcb3d9e96a
Author: Jan Kiszka <jan.kiszka at siemens.com>
Date:   Sun May 26 21:42:40 2013 +0200

    exec: Allow unaligned address_space_rw
    
    This will be needed for some corner cases with para-virtual I/O ports.
    
    Signed-off-by: Jan Kiszka <jan.kiszka at siemens.com>
    Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>

diff --git a/exec.c b/exec.c
index 510d01b..9c6f1fe 100644
--- a/exec.c
+++ b/exec.c
@@ -1913,12 +1913,12 @@ static inline bool memory_access_is_direct(MemoryRegion *mr, bool is_write)
     return false;
 }
 
-static inline int memory_access_size(int l, hwaddr addr)
+static inline int memory_access_size(MemoryRegion *mr, int l, hwaddr addr)
 {
-    if (l >= 4 && ((addr & 3) == 0)) {
+    if (l >= 4 && (((addr & 3) == 0 || mr->ops->impl.unaligned))) {
         return 4;
     }
-    if (l >= 2 && ((addr & 1) == 0)) {
+    if (l >= 2 && (((addr & 1) == 0) || mr->ops->impl.unaligned)) {
         return 2;
     }
     return 1;
@@ -1940,7 +1940,7 @@ bool address_space_rw(AddressSpace *as, hwaddr addr, uint8_t *buf,
 
         if (is_write) {
             if (!memory_access_is_direct(section->mr, is_write)) {
-                l = memory_access_size(l, addr1);
+                l = memory_access_size(section->mr, l, addr1);
                 /* XXX: could force cpu_single_env to NULL to avoid
                    potential bugs */
                 if (l == 4) {
@@ -1966,7 +1966,7 @@ bool address_space_rw(AddressSpace *as, hwaddr addr, uint8_t *buf,
         } else {
             if (!memory_access_is_direct(section->mr, is_write)) {
                 /* I/O case */
-                l = memory_access_size(l, addr1);
+                l = memory_access_size(section->mr, l, addr1);
                 if (l == 4) {
                     /* 32 bit read access */
                     error |= io_mem_read(section->mr, addr1, &val, 4);
@@ -2097,7 +2097,7 @@ bool address_space_access_valid(AddressSpace *as, hwaddr addr, int len, bool is_
         l = len;
         section = address_space_translate(as, addr, &xlat, &l, is_write);
         if (!memory_access_is_direct(section->mr, is_write)) {
-            l = memory_access_size(l, addr);
+            l = memory_access_size(section->mr, l, addr);
             if (!memory_region_access_valid(section->mr, xlat, l, is_write)) {
                 return false;
             }
commit 1db8abb10243abe969a2ba307664ba51b60fcac6
Author: Paolo Bonzini <pbonzini at redhat.com>
Date:   Tue May 21 12:07:21 2013 +0200

    memory: move private types to exec.c
    
    Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>

diff --git a/exec.c b/exec.c
index 86efed7..510d01b 100644
--- a/exec.c
+++ b/exec.c
@@ -81,6 +81,22 @@ int use_icount;
 
 #if !defined(CONFIG_USER_ONLY)
 
+typedef struct PhysPageEntry PhysPageEntry;
+
+struct PhysPageEntry {
+    uint16_t is_leaf : 1;
+     /* index into phys_sections (is_leaf) or phys_map_nodes (!is_leaf) */
+    uint16_t ptr : 15;
+};
+
+struct AddressSpaceDispatch {
+    /* This is a multi-level map on the physical address space.
+     * The bottom level has pointers to MemoryRegionSections.
+     */
+    PhysPageEntry phys_map;
+    MemoryListener listener;
+};
+
 static MemoryRegionSection *phys_sections;
 static unsigned phys_sections_nb, phys_sections_nb_alloc;
 static uint16_t phys_section_unassigned;
diff --git a/include/exec/memory-internal.h b/include/exec/memory-internal.h
index 799c02a..26689fe 100644
--- a/include/exec/memory-internal.h
+++ b/include/exec/memory-internal.h
@@ -22,24 +22,9 @@
 #ifndef CONFIG_USER_ONLY
 #include "hw/xen/xen.h"
 
-typedef struct PhysPageEntry PhysPageEntry;
-
-struct PhysPageEntry {
-    uint16_t is_leaf : 1;
-     /* index into phys_sections (is_leaf) or phys_map_nodes (!is_leaf) */
-    uint16_t ptr : 15;
-};
 
 typedef struct AddressSpaceDispatch AddressSpaceDispatch;
 
-struct AddressSpaceDispatch {
-    /* This is a multi-level map on the physical address space.
-     * The bottom level has pointers to MemoryRegionSections.
-     */
-    PhysPageEntry phys_map;
-    MemoryListener listener;
-};
-
 void address_space_init_dispatch(AddressSpace *as);
 void address_space_destroy_dispatch(AddressSpace *as);
 
commit 9f029603abb1472a33e008f9ea4127cc52566986
Author: Jan Kiszka <jan.kiszka at siemens.com>
Date:   Mon May 6 16:48:02 2013 +0200

    memory: Introduce address_space_lookup_region
    
    This introduces a wrapper for phys_page_find (before we complicate
    address_space_translate with IOMMU translation).  This function will
    also encapsulate locking and reference counting when we introduce
    BQL-free dispatching.
    
    Signed-off-by: Jan Kiszka <jan.kiszka at siemens.com>
    Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>

diff --git a/exec.c b/exec.c
index eb200d0..86efed7 100644
--- a/exec.c
+++ b/exec.c
@@ -203,6 +203,12 @@ bool memory_region_is_unassigned(MemoryRegion *mr)
         && mr != &io_mem_watch;
 }
 
+static MemoryRegionSection *address_space_lookup_region(AddressSpace *as,
+                                                        hwaddr addr)
+{
+    return phys_page_find(as->dispatch, addr >> TARGET_PAGE_BITS);
+}
+
 MemoryRegionSection *address_space_translate(AddressSpace *as, hwaddr addr,
                                              hwaddr *xlat, hwaddr *plen,
                                              bool is_write)
@@ -210,7 +216,7 @@ MemoryRegionSection *address_space_translate(AddressSpace *as, hwaddr addr,
     MemoryRegionSection *section;
     Int128 diff;
 
-    section = phys_page_find(as->dispatch, addr >> TARGET_PAGE_BITS);
+    section = address_space_lookup_region(as, addr);
     /* Compute offset within MemoryRegionSection */
     addr -= section->offset_within_address_space;
 
commit 3752a0364883ed00100cc537832c434de3f77931
Author: Peter Maydell <peter.maydell at linaro.org>
Date:   Thu Jun 20 15:18:04 2013 +0100

    exec.c: address_space_translate: handle access to addr 0 of 2^64 sized region
    
    The memory API allows a MemoryRegion's size to be 2^64, as a special
    case (otherwise the size always fits in a 64 bit integer). This meant
    that attempts to access address zero in a 2^64 sized region would
    assert in address_space_translate():
    
      #3  0x00007ffff3e4d192 in __GI___assert_fail#(assertion=0x555555a43f32
        "!a.hi", file=0x555555a43ef0 "include/qemu/int128.h", line=18,
        function=0x555555a4439f "int128_get64") at assert.c:103
      #4  0x0000555555877642 in int128_get64 (a=...)
        at include/qemu/int128.h:18
      #5  0x00005555558782f2 in address_space_translate (as=0x55555668d140,
       /addr=0, xlat=0x7fffafac9918, plen=0x7fffafac9920, is_write=false)
        at exec.c:221
    
    Fix this by doing the 'min' operation in 128 bit arithmetic
    rather than 64 bit arithmetic (we know the result of the 'min'
    definitely fits in 64 bits because one of the inputs did).
    
    Signed-off-by: Peter Maydell <peter.maydell at linaro.org>
    Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>

diff --git a/exec.c b/exec.c
index 5b8b40d..eb200d0 100644
--- a/exec.c
+++ b/exec.c
@@ -218,7 +218,7 @@ MemoryRegionSection *address_space_translate(AddressSpace *as, hwaddr addr,
     *xlat = addr + section->offset_within_region;
 
     diff = int128_sub(section->mr->size, int128_make64(addr));
-    *plen = MIN(int128_get64(diff), *plen);
+    *plen = int128_get64(int128_min(diff, int128_make64(*plen)));
     return section;
 }
 #endif
commit 4eda32f588086b6cd0ec2be6a7a6c131f8c2b427
Author: Markus Armbruster <armbru at redhat.com>
Date:   Fri Jun 14 13:15:06 2013 +0200

    doc: Drop ref to Bochs from -no-fd-bootchk documentation
    
    Manual page and qemu-doc on talk about "Bochs BIOS".  We use SeaBIOS,
    and it implements the feature.  Replace by just "BIOS", and drop the
    TODO line wondering about the Bochs reference.
    
    Signed-off-by: Markus Armbruster <armbru at redhat.com>
    Reviewed-by: Anthony Liguori <aliguori at us.ibm.com>
    Message-id: 1371208516-7857-7-git-send-email-armbru at redhat.com
    Signed-off-by: Anthony Liguori <aliguori at us.ibm.com>

diff --git a/qemu-options.hx b/qemu-options.hx
index bf94862..8355f9b 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -1268,9 +1268,8 @@ DEF("no-fd-bootchk", 0, QEMU_OPTION_no_fd_bootchk,
 STEXI
 @item -no-fd-bootchk
 @findex -no-fd-bootchk
-Disable boot signature checking for floppy disks in Bochs BIOS. It may
+Disable boot signature checking for floppy disks in BIOS. May
 be needed to boot from old floppy disks.
-TODO: check reference to Bochs BIOS.
 ETEXI
 
 DEF("no-acpi", 0, QEMU_OPTION_no_acpi,
commit e1123015a50abf44f9daa9495f40eeaaf4c9bb98
Author: Markus Armbruster <armbru at redhat.com>
Date:   Fri Jun 14 13:15:05 2013 +0200

    pc: Make -no-fd-bootchk stick across boot order changes
    
    Option -no-fd-bootchk asks the BIOS to attempt booting from a floppy
    even when the boot sector signature isn't there, by setting a bit in
    RTC CMOS.  It was added back in 2006 (commit 52ca8d6a).
    
    Two years later, commit 0ecdffbb added monitor command boot_set.
    Implemented by new function pc_boot_set().  It unconditionally clears
    the floppy signature bit in CMOS.
    
    Commit e0f084bf added -boot option once to automatically change the
    boot order on first reset.  Reuses pc_boot_set(), thus also clears the
    floppy signature bit.  Commit d9346e81 took care to preserve this
    behavior.
    
    Thus, -no-fd-bootchk applies to any number of boots.  Except it
    applies just to the first boot with -boot once, and never after
    boot_set.  Weird.  Make it stick instead: set the bit according to
    -no-fd-bootchk in pc_boot_set().
    
    Signed-off-by: Markus Armbruster <armbru at redhat.com>
    Reviewed-by: Anthony Liguori <aliguori at us.ibm.com>
    Message-id: 1371208516-7857-6-git-send-email-armbru at redhat.com
    Signed-off-by: Anthony Liguori <aliguori at us.ibm.com>

diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index e0fbb86..5e8f143 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -266,7 +266,7 @@ static int boot_device2nibble(char boot_device)
     return 0;
 }
 
-static int set_boot_dev(ISADevice *s, const char *boot_device, int fd_bootchk)
+static int set_boot_dev(ISADevice *s, const char *boot_device)
 {
 #define PC_MAX_BOOT_DEVICES 3
     int nbds, bds[3] = { 0, };
@@ -292,7 +292,7 @@ static int set_boot_dev(ISADevice *s, const char *boot_device, int fd_bootchk)
 
 static int pc_boot_set(void *opaque, const char *boot_device)
 {
-    return set_boot_dev(opaque, boot_device, 0);
+    return set_boot_dev(opaque, boot_device);
 }
 
 typedef struct pc_cmos_init_late_arg {
@@ -407,8 +407,7 @@ void pc_cmos_init(ram_addr_t ram_size, ram_addr_t above_4g_mem_size,
     cpu_hotplug_cb.cpu_added_notifier.notify = rtc_notify_cpu_added;
     qemu_register_cpu_added_notifier(&cpu_hotplug_cb.cpu_added_notifier);
 
-    /* set boot devices, and disable floppy signature check if requested */
-    if (set_boot_dev(s, boot_device, fd_bootchk)) {
+    if (set_boot_dev(s, boot_device)) {
         exit(1);
     }
 
commit 083b79c9fea0e3842f0b2b57ff0d20ab5f57084a
Author: Markus Armbruster <armbru at redhat.com>
Date:   Fri Jun 14 13:15:04 2013 +0200

    vl: Rename *boot_devices to *boot_order, for consistency
    
    Signed-off-by: Markus Armbruster <armbru at redhat.com>
    Reviewed-by: Anthony Liguori <aliguori at us.ibm.com>
    Message-id: 1371208516-7857-5-git-send-email-armbru at redhat.com
    Signed-off-by: Anthony Liguori <aliguori at us.ibm.com>

diff --git a/include/hw/hw.h b/include/hw/hw.h
index 1fb9afa..cc9f847 100644
--- a/include/hw/hw.h
+++ b/include/hw/hw.h
@@ -44,9 +44,9 @@ void qemu_unregister_reset(QEMUResetHandler *func, void *opaque);
 
 /* handler to set the boot_device order for a specific type of QEMUMachine */
 /* return 0 if success */
-typedef int QEMUBootSetHandler(void *opaque, const char *boot_devices);
+typedef int QEMUBootSetHandler(void *opaque, const char *boot_order);
 void qemu_register_boot_set(QEMUBootSetHandler *func, void *opaque);
-int qemu_boot_set(const char *boot_devices);
+int qemu_boot_set(const char *boot_order);
 
 #ifdef NEED_CPU_H
 #if TARGET_LONG_BITS == 64
diff --git a/vl.c b/vl.c
index ea4e1ef..767e020 100644
--- a/vl.c
+++ b/vl.c
@@ -1149,12 +1149,12 @@ void qemu_register_boot_set(QEMUBootSetHandler *func, void *opaque)
     boot_set_opaque = opaque;
 }
 
-int qemu_boot_set(const char *boot_devices)
+int qemu_boot_set(const char *boot_order)
 {
     if (!boot_set_handler) {
         return -EINVAL;
     }
-    return boot_set_handler(boot_set_opaque, boot_devices);
+    return boot_set_handler(boot_set_opaque, boot_order);
 }
 
 static void validate_bootdevices(const char *devices)
@@ -1185,9 +1185,9 @@ static void validate_bootdevices(const char *devices)
     }
 }
 
-static void restore_boot_devices(void *opaque)
+static void restore_boot_order(void *opaque)
 {
-    char *standard_boot_devices = opaque;
+    char *normal_boot_order = opaque;
     static int first = 1;
 
     /* Restore boot order and remove ourselves after the first boot */
@@ -1196,10 +1196,10 @@ static void restore_boot_devices(void *opaque)
         return;
     }
 
-    qemu_boot_set(standard_boot_devices);
+    qemu_boot_set(normal_boot_order);
 
-    qemu_unregister_reset(restore_boot_devices, standard_boot_devices);
-    g_free(standard_boot_devices);
+    qemu_unregister_reset(restore_boot_order, normal_boot_order);
+    g_free(normal_boot_order);
 }
 
 void add_boot_device_path(int32_t bootindex, DeviceState *dev,
@@ -4093,7 +4093,7 @@ int main(int argc, char **argv, char **envp)
             validate_bootdevices(once);
             normal_boot_order = g_strdup(boot_order);
             boot_order = once;
-            qemu_register_reset(restore_boot_devices, normal_boot_order);
+            qemu_register_reset(restore_boot_order, normal_boot_order);
         }
 
         boot_menu = qemu_opt_get_bool(opts, "menu", boot_menu);
commit 8281abd548d840d84223e66812491918c713e56c
Author: Markus Armbruster <armbru at redhat.com>
Date:   Fri Jun 14 13:15:03 2013 +0200

    vl: Fix -boot order and once regressions, and related bugs
    
    Option "once" sets up a different boot order just for the initial
    boot.  Boot order reverts back to normal on reset.  Option "order"
    changes the normal boot order.
    
    The reversal is implemented by reset handler restore_boot_devices(),
    which takes the boot order to revert to as argument.
    restore_boot_devices() does nothing on its first call, because that
    must be the initial machine reset.  On its second call, it changes the
    boot order back, and unregisters itself.
    
    Because we register the handler right when -boot gets parsed, we can
    revert to an incorrect normal boot order, and multiple -boot can
    interact in funny ways.
    
    Here's how things work without -boot once or order:
    
    * boot_devices is "".
    
    * main() passes machine->boot_order to to machine->init(), because
      boot_devices is "".  machine->init() configures firmware
      accordingly.  For PC machines, machine->boot_order is "cad", and
      pc_cmos_init() writes it to RTC CMOS, where SeaBIOS picks it up.
    
    Now consider -boot order=:
    
    * boot_devices is "".
    
    * -boot order= sets boot_devices to "" (no change).
    
    * main() passes machine->boot_order to to machine->init(), because
      boot_devices is "", as above.
    
      Bug: -boot order= has no effect.  Broken in commit e4ada29e.
    
    Next, consider -boot once=a:
    
    * boot_devices is "".
    
    * -boot once=a registers restore_boot_devices() with argument "", and
      sets boot_devices to "a".
    
    * main() passes boot_devices "a" to machine->init(), which configures
      firmware accordingly.  For PC machines, pc_cmos_init() writes the
      boot order to RTC CMOS.
    
    * main() calls qemu_system_reset().  This runs reset handlers.
    
      - restore_boot_devices() gets called with argument "".  Does
        nothing, because it's the first call.
    
    * Machine boots, boot order is "a".
    
    * Machine resets (e.g. monitor command).  Reset handlers run.
    
      - restore_boot_devices() gets called with argument "".  Calls
        qemu_boot_set("") to reconfigure firmware.  For PC machines,
        pc_boot_set() writes it into RTC CMOS.  Reset handler
        unregistered.
    
        Bug: boot order reverts to "" instead of machine->boot_order.  The
        actual boot order depends on how firmware interprets "".  Broken
        in commit e4ada29e.
    
    Next, consider -boot once=a -boot order=c:
    
    * boot_devices is "".
    
    * -boot once=a registers restore_boot_devices() with argument "", and
      sets boot_devices to "a".
    
    * -boot order=c sets boot_devices to "c".
    
    * main() passes boot_devices "c" to machine->init(), which configures
      firmware accordingly.  For PC machines, pc_cmos_init() writes the
      boot order to RTC CMOS.
    
    * main() calls qemu_system_reset().  This runs reset handlers.
    
      - restore_boot_devices() gets called with argument "".  Does
        nothing, because it's the first call.
    
    * Machine boots, boot order is "c".
    
      Bug: it should be "a".  I figure this has always been broken.
    
    * Machine resets (e.g. monitor command).  Reset handlers run.
    
      - restore_boot_devices() gets called with argument "".  Calls
        qemu_boot_set("") to reconfigure firmware.  For PC machines,
        pc_boot_set() writes it into RTC CMOS.  Reset handler
        unregistered.
    
        Bug: boot order reverts to "" instead of "c".  I figure this has
        always been broken, just differently broken before commit
        e4ada29e.
    
    Next, consider -boot once=a -boot once=b -boot once=c:
    
    * boot_devices is "".
    
    * -boot once=a registers restore_boot_devices() with argument "", and
      sets boot_devices to "a".
    
    * -boot once=b registers restore_boot_devices() with argument "a", and
      sets boot_devices to "b".
    
    * -boot once=c registers restore_boot_devices() with argument "b", and
      sets boot_devices to "c".
    
    * main() passes boot_devices "c" to machine->init(), which configures
      firmware accordingly.  For PC machines, pc_cmos_init() writes the
      boot order to RTC CMOS.
    
    * main() calls qemu_system_reset().  This runs reset handlers.
    
      - restore_boot_devices() gets called with argument "".  Does
        nothing, because it's the first call.
    
      - restore_boot_devices() gets called with argument "a".  Calls
        qemu_boot_set("a") to reconfigure firmware.  For PC machines,
        pc_boot_set() writes it into RTC CMOS.  Reset handler
        unregistered.
    
      - restore_boot_devices() gets called with argument "b".  Calls
        qemu_boot_set("b") to reconfigure firmware.  For PC machines,
        pc_boot_set() writes it into RTC CMOS.  Reset handler
        unregistered.
    
    * Machine boots, boot order is "b".
    
      Bug: should really be "c", because that came last, and for all other
      -boot options, the last one wins.  I figure this was broken some
      time before commit 37905d6a, and fixed there only for a single
      occurence of "once".
    
    * Machine resets (e.g. monitor command).  Reset handlers run.
    
      - restore_boot_devices() gets called with argument "".  Calls
        qemu_boot_set("") to reconfigure firmware.  For PC machines,
        pc_boot_set() writes it into RTC CMOS.  Reset handler
        unregistered.
    
        Same bug as above: boot order reverts to "" instead of
        machine->boot_order.
    
    Fix by acting upon -boot options order, once and menu only after
    option parsing is complete, and the machine is known.  This is how the
    other -boot options work already.
    
    Signed-off-by: Markus Armbruster <armbru at redhat.com>
    Reviewed-by: Anthony Liguori <aliguori at us.ibm.com>
    Message-id: 1371208516-7857-4-git-send-email-armbru at redhat.com
    Signed-off-by: Anthony Liguori <aliguori at us.ibm.com>

diff --git a/vl.c b/vl.c
index ed92463..ea4e1ef 100644
--- a/vl.c
+++ b/vl.c
@@ -2841,7 +2841,7 @@ int main(int argc, char **argv, char **envp)
     const char *icount_option = NULL;
     const char *initrd_filename;
     const char *kernel_filename, *kernel_cmdline;
-    char boot_devices[33] = "";
+    const char *boot_order = NULL;
     DisplayState *ds;
     int cyls, heads, secs, translation;
     QemuOpts *hda_opts = NULL, *opts, *machine_opts;
@@ -3131,31 +3131,9 @@ int main(int argc, char **argv, char **envp)
                 drive_add(IF_DEFAULT, 2, optarg, CDROM_OPTS);
                 break;
             case QEMU_OPTION_boot:
-                {
-                    char *standard_boot_devices;
-                    const char *order, *once;
-
-                    opts = qemu_opts_parse(qemu_find_opts("boot-opts"),
-                                           optarg, 1);
-                    if (!opts) {
-                        exit(1);
-                    }
-
-                    order = qemu_opt_get(opts, "order");
-                    if (order) {
-                        validate_bootdevices(order);
-                        pstrcpy(boot_devices, sizeof(boot_devices), order);
-                    }
-
-                    once = qemu_opt_get(opts, "once");
-                    if (once) {
-                        validate_bootdevices(once);
-                        standard_boot_devices = g_strdup(boot_devices);
-                        pstrcpy(boot_devices, sizeof(boot_devices), once);
-                        qemu_register_reset(restore_boot_devices,
-                                            standard_boot_devices);
-                    }
-                    boot_menu = qemu_opt_get_bool(opts, "menu", boot_menu);
+                opts = qemu_opts_parse(qemu_find_opts("boot-opts"), optarg, 1);
+                if (!opts) {
+                    exit(1);
                 }
                 break;
             case QEMU_OPTION_fda:
@@ -4096,6 +4074,31 @@ int main(int argc, char **argv, char **envp)
         kernel_filename = initrd_filename = kernel_cmdline = NULL;
     }
 
+    if (!boot_order) {
+        boot_order = machine->boot_order;
+    }
+    opts = qemu_opts_find(qemu_find_opts("boot-opts"), NULL);
+    if (opts) {
+        char *normal_boot_order;
+        const char *order, *once;
+
+        order = qemu_opt_get(opts, "order");
+        if (order) {
+            validate_bootdevices(order);
+            boot_order = order;
+        }
+
+        once = qemu_opt_get(opts, "once");
+        if (once) {
+            validate_bootdevices(once);
+            normal_boot_order = g_strdup(boot_order);
+            boot_order = once;
+            qemu_register_reset(restore_boot_devices, normal_boot_order);
+        }
+
+        boot_menu = qemu_opt_get_bool(opts, "menu", boot_menu);
+    }
+
     if (!kernel_cmdline) {
         kernel_cmdline = "";
     }
@@ -4260,9 +4263,7 @@ int main(int argc, char **argv, char **envp)
     qdev_machine_init();
 
     QEMUMachineInitArgs args = { .ram_size = ram_size,
-                                 .boot_device = (boot_devices[0] == '\0') ?
-                                                machine->boot_order :
-                                                boot_devices,
+                                 .boot_device = boot_order,
                                  .kernel_filename = kernel_filename,
                                  .kernel_cmdline = kernel_cmdline,
                                  .initrd_filename = initrd_filename,
commit a86b35f992f107323e432c0a96107e11e1b699ad
Author: Markus Armbruster <armbru at redhat.com>
Date:   Fri Jun 14 13:15:02 2013 +0200

    qemu-option: check_params() is now unused, drop it
    
    Signed-off-by: Markus Armbruster <armbru at redhat.com>
    Reviewed-by: Anthony Liguori <aliguori at us.ibm.com>
    Message-id: 1371208516-7857-3-git-send-email-armbru at redhat.com
    Signed-off-by: Anthony Liguori <aliguori at us.ibm.com>

diff --git a/include/qemu/option.h b/include/qemu/option.h
index bdb6d21..a83c700 100644
--- a/include/qemu/option.h
+++ b/include/qemu/option.h
@@ -55,8 +55,6 @@ int get_next_param_value(char *buf, int buf_size,
                          const char *tag, const char **pstr);
 int get_param_value(char *buf, int buf_size,
                     const char *tag, const char *str);
-int check_params(char *buf, int buf_size,
-                 const char * const *params, const char *str);
 
 
 /*
diff --git a/util/qemu-option.c b/util/qemu-option.c
index 8b74bf1..412c425 100644
--- a/util/qemu-option.c
+++ b/util/qemu-option.c
@@ -123,36 +123,6 @@ int get_param_value(char *buf, int buf_size,
     return get_next_param_value(buf, buf_size, tag, &str);
 }
 
-int check_params(char *buf, int buf_size,
-                 const char * const *params, const char *str)
-{
-    const char *p;
-    int i;
-
-    p = str;
-    while (*p != '\0') {
-        p = get_opt_name(buf, buf_size, p, '=');
-        if (*p != '=') {
-            return -1;
-        }
-        p++;
-        for (i = 0; params[i] != NULL; i++) {
-            if (!strcmp(params[i], buf)) {
-                break;
-            }
-        }
-        if (params[i] == NULL) {
-            return -1;
-        }
-        p = get_opt_value(NULL, 0, p);
-        if (*p != ',') {
-            break;
-        }
-        p++;
-    }
-    return 0;
-}
-
 /*
  * Searches an option list for an option with the given name
  */
commit 6ef4716cecdfa1b3794c1a33edba9840e1aa6b5f
Author: Markus Armbruster <armbru at redhat.com>
Date:   Fri Jun 14 13:15:01 2013 +0200

    vl: Clean up parsing of -boot option argument
    
    Commit 3d3b8303 threw in some QemuOpts parsing without replacing the
    existing ad hoc parser, resulting in a confusing mess.  Clean it up.
    
    Two user-visible changes:
    
    1. Invalid options are reported more nicely.  Before:
    
            qemu: unknown boot parameter 'x' in 'x=y'
    
       After:
    
            qemu-system-x86_64: -boot x=y: Invalid parameter 'x'
    
    2. If -boot is given multiple times, options accumulate, just like for
       -machine.  Before, only options order, once and menu accumulated.
       For the other ones, all but the first -boot in non-legacy syntax
       got simply ignored.
    
    Signed-off-by: Markus Armbruster <armbru at redhat.com>
    Reviewed-by: Anthony Liguori <aliguori at us.ibm.com>
    Message-id: 1371208516-7857-2-git-send-email-armbru at redhat.com
    Signed-off-by: Anthony Liguori <aliguori at us.ibm.com>

diff --git a/vl.c b/vl.c
index f94ec9c..ed92463 100644
--- a/vl.c
+++ b/vl.c
@@ -434,9 +434,10 @@ static QemuOptsList qemu_machine_opts = {
 
 static QemuOptsList qemu_boot_opts = {
     .name = "boot-opts",
+    .implied_opt_name = "order",
+    .merge_lists = true,
     .head = QTAILQ_HEAD_INITIALIZER(qemu_boot_opts.head),
     .desc = {
-        /* the three names below are not used now */
         {
             .name = "order",
             .type = QEMU_OPT_STRING,
@@ -445,8 +446,7 @@ static QemuOptsList qemu_boot_opts = {
             .type = QEMU_OPT_STRING,
         }, {
             .name = "menu",
-            .type = QEMU_OPT_STRING,
-        /* following are really used */
+            .type = QEMU_OPT_BOOL,
         }, {
             .name = "splash",
             .type = QEMU_OPT_STRING,
@@ -1157,7 +1157,7 @@ int qemu_boot_set(const char *boot_devices)
     return boot_set_handler(boot_set_opaque, boot_devices);
 }
 
-static void validate_bootdevices(char *devices)
+static void validate_bootdevices(const char *devices)
 {
     /* We just do some generic consistency checks */
     const char *p;
@@ -3132,70 +3132,30 @@ int main(int argc, char **argv, char **envp)
                 break;
             case QEMU_OPTION_boot:
                 {
-                    static const char * const params[] = {
-                        "order", "once", "menu",
-                        "splash", "splash-time",
-                        "reboot-timeout", "strict", NULL
-                    };
-                    char buf[sizeof(boot_devices)];
                     char *standard_boot_devices;
-                    int legacy = 0;
-
-                    if (!strchr(optarg, '=')) {
-                        legacy = 1;
-                        pstrcpy(buf, sizeof(buf), optarg);
-                    } else if (check_params(buf, sizeof(buf), params, optarg) < 0) {
-                        fprintf(stderr,
-                                "qemu: unknown boot parameter '%s' in '%s'\n",
-                                buf, optarg);
+                    const char *order, *once;
+
+                    opts = qemu_opts_parse(qemu_find_opts("boot-opts"),
+                                           optarg, 1);
+                    if (!opts) {
                         exit(1);
                     }
 
-                    if (legacy ||
-                        get_param_value(buf, sizeof(buf), "order", optarg)) {
-                        validate_bootdevices(buf);
-                        pstrcpy(boot_devices, sizeof(boot_devices), buf);
+                    order = qemu_opt_get(opts, "order");
+                    if (order) {
+                        validate_bootdevices(order);
+                        pstrcpy(boot_devices, sizeof(boot_devices), order);
                     }
-                    if (!legacy) {
-                        if (get_param_value(buf, sizeof(buf),
-                                            "once", optarg)) {
-                            validate_bootdevices(buf);
-                            standard_boot_devices = g_strdup(boot_devices);
-                            pstrcpy(boot_devices, sizeof(boot_devices), buf);
-                            qemu_register_reset(restore_boot_devices,
-                                                standard_boot_devices);
-                        }
-                        if (get_param_value(buf, sizeof(buf),
-                                            "menu", optarg)) {
-                            if (!strcmp(buf, "on")) {
-                                boot_menu = 1;
-                            } else if (!strcmp(buf, "off")) {
-                                boot_menu = 0;
-                            } else {
-                                fprintf(stderr,
-                                        "qemu: invalid option value '%s'\n",
-                                        buf);
-                                exit(1);
-                            }
-                        }
-                        if (get_param_value(buf, sizeof(buf),
-                                            "strict", optarg)) {
-                            if (!strcmp(buf, "on")) {
-                                boot_strict = true;
-                            } else if (!strcmp(buf, "off")) {
-                                boot_strict = false;
-                            } else {
-                                fprintf(stderr,
-                                        "qemu: invalid option value '%s'\n",
-                                        buf);
-                                exit(1);
-                            }
-                        }
-                        if (!qemu_opts_parse(qemu_find_opts("boot-opts"),
-                                             optarg, 0)) {
-                            exit(1);
-                        }
+
+                    once = qemu_opt_get(opts, "once");
+                    if (once) {
+                        validate_bootdevices(once);
+                        standard_boot_devices = g_strdup(boot_devices);
+                        pstrcpy(boot_devices, sizeof(boot_devices), once);
+                        qemu_register_reset(restore_boot_devices,
+                                            standard_boot_devices);
                     }
+                    boot_menu = qemu_opt_get_bool(opts, "menu", boot_menu);
                 }
                 break;
             case QEMU_OPTION_fda:
commit b5a87d26e848945eb891f4d7e4a7f2be514e08d5
Author: Gertjan Halkes <qemu at ghalkes.nl>
Date:   Fri Nov 11 16:04:20 2011 +0100

    make user networking hostfwd work with restrict=y
    
    This patch allows the hostfwd option to override the restrict=y setting in
    the user network stack, as explicitly stated in the documentation on the
    restrict option:
    
         restrict=on|off
             If this option is enabled, the guest will be isolated, i.e. it
             will not be able to contact the host and no guest IP packets
             will be routed over the host to the outside. This option does
             not affect any explicitly set forwarding rules.
    
    Qemu bug tracker:
    https://bugs.launchpad.net/qemu/+bug/829455
    
    Signed-off-by: Gertjan Halkes <qemu at ghalkes.nl>
    Signed-off-by: Jan Kiszka <jan.kiszka at siemens.com>

diff --git a/slirp/tcp_input.c b/slirp/tcp_input.c
index 6440eae..f946db8 100644
--- a/slirp/tcp_input.c
+++ b/slirp/tcp_input.c
@@ -316,16 +316,6 @@ tcp_input(struct mbuf *m, int iphlen, struct socket *inso)
 	m->m_data += sizeof(struct tcpiphdr)+off-sizeof(struct tcphdr);
 	m->m_len  -= sizeof(struct tcpiphdr)+off-sizeof(struct tcphdr);
 
-    if (slirp->restricted) {
-        for (ex_ptr = slirp->exec_list; ex_ptr; ex_ptr = ex_ptr->ex_next) {
-            if (ex_ptr->ex_fport == ti->ti_dport &&
-                ti->ti_dst.s_addr == ex_ptr->ex_addr.s_addr) {
-                break;
-            }
-        }
-        if (!ex_ptr)
-            goto drop;
-    }
 	/*
 	 * Locate pcb for segment.
 	 */
@@ -355,6 +345,22 @@ findso:
 	 * as if it was LISTENING, and continue...
 	 */
         if (so == NULL) {
+          if (slirp->restricted) {
+            /* Any hostfwds will have an existing socket, so we only get here
+             * for non-hostfwd connections. These should be dropped, unless it
+             * happens to be a guestfwd.
+             */
+            for (ex_ptr = slirp->exec_list; ex_ptr; ex_ptr = ex_ptr->ex_next) {
+                if (ex_ptr->ex_fport == ti->ti_dport &&
+                    ti->ti_dst.s_addr == ex_ptr->ex_addr.s_addr) {
+                    break;
+                }
+            }
+            if (!ex_ptr) {
+                goto dropwithreset;
+            }
+          }
+
 	  if ((tiflags & (TH_SYN|TH_FIN|TH_RST|TH_URG|TH_ACK)) != TH_SYN)
 	    goto dropwithreset;
 
commit 68756ba8be2127b6ea30a466af9f78a5c97bc15f
Author: Bas van Sisseren <bas at quarantainenet.nl>
Date:   Mon Jun 3 15:11:49 2013 +0200

    fix -net user checks by reordering checks
    
    reorder slirp config options. first check the dns-server-address,
    then check the first-dhcp-address. the original code was comparing
    the first-dhcp-address with the default dns-server-address, not
    the configured dns-server-address.
    
    Signed-off-by: Bas van Sisseren <bas at quarantainenet.nl>
    Signed-off-by: Jan Kiszka <jan.kiszka at siemens.com>

diff --git a/net/slirp.c b/net/slirp.c
index b3f35d5..124e953 100644
--- a/net/slirp.c
+++ b/net/slirp.c
@@ -212,19 +212,19 @@ static int net_slirp_init(NetClientState *peer, const char *model,
         return -1;
     }
 
-    if (vdhcp_start && !inet_aton(vdhcp_start, &dhcp)) {
+    if (vnameserver && !inet_aton(vnameserver, &dns)) {
         return -1;
     }
-    if ((dhcp.s_addr & mask.s_addr) != net.s_addr ||
-        dhcp.s_addr == host.s_addr || dhcp.s_addr == dns.s_addr) {
+    if ((dns.s_addr & mask.s_addr) != net.s_addr ||
+        dns.s_addr == host.s_addr) {
         return -1;
     }
 
-    if (vnameserver && !inet_aton(vnameserver, &dns)) {
+    if (vdhcp_start && !inet_aton(vdhcp_start, &dhcp)) {
         return -1;
     }
-    if ((dns.s_addr & mask.s_addr) != net.s_addr ||
-        dns.s_addr == host.s_addr) {
+    if ((dhcp.s_addr & mask.s_addr) != net.s_addr ||
+        dhcp.s_addr == host.s_addr || dhcp.s_addr == dns.s_addr) {
         return -1;
     }
 
commit c1990468d5a13c8f1d9111fcca0a78a6adad062b
Author: Michael Roth <mdroth at linux.vnet.ibm.com>
Date:   Wed May 22 11:32:51 2013 -0500

    wdt_i6300esb: fix vmstate versioning
    
    When this VMSD was introduced it's version fields were set to
    sizeof(I6300State), making them essentially random from build to build,
    version to version.
    
    To fix this, we lock in a high version id and low minimum version id to
    support old->new migration from all prior versions of this device's
    state. This should work since the device state has not changed since
    its introduction.
    
    The potentially breaks migration from 1.5+ to 1.5, but since the
    versioning was essentially random prior to this patch, new->old
    migration was not consistently functional to begin with.
    
    Reported-by: Nicholas Thomas <nick at bytemark.co.uk>
    Suggested-by: Peter Maydell <peter.maydell at linaro.org>
    Cc: qemu-stable at nongnu.org
    Signed-off-by: Michael Roth <mdroth at linux.vnet.ibm.com>
    Signed-off-by: Anthony Liguori <aliguori at us.ibm.com>

diff --git a/hw/watchdog/wdt_i6300esb.c b/hw/watchdog/wdt_i6300esb.c
index 1407fba..05af0b1 100644
--- a/hw/watchdog/wdt_i6300esb.c
+++ b/hw/watchdog/wdt_i6300esb.c
@@ -374,9 +374,22 @@ static const MemoryRegionOps i6300esb_ops = {
 
 static const VMStateDescription vmstate_i6300esb = {
     .name = "i6300esb_wdt",
-    .version_id = sizeof(I6300State),
-    .minimum_version_id = sizeof(I6300State),
-    .minimum_version_id_old = sizeof(I6300State),
+    /* With this VMSD's introduction, version_id/minimum_version_id were
+     * erroneously set to sizeof(I6300State), causing a somewhat random
+     * version_id to be set for every build. This eventually broke
+     * migration.
+     *
+     * To correct this without breaking old->new migration for older versions
+     * of QEMU, we've set version_id to a value high enough to exceed all past
+     * values of sizeof(I6300State) across various build environments, and have
+     * reset minimum_version_id_old/minimum_version_id to 1, since this VMSD
+     * has never changed and thus can accept all past versions.
+     *
+     * For future changes we can treat these values as we normally would.
+     */
+    .version_id = 10000,
+    .minimum_version_id = 1,
+    .minimum_version_id_old = 1,
     .fields      = (VMStateField []) {
         VMSTATE_PCI_DEVICE(dev, I6300State),
         VMSTATE_INT32(reboot_enabled, I6300State),
commit 3ed8a8430a896088db55ae029e9eac200aedbe5c
Merge: 7d8f4b3 1288844
Author: Anthony Liguori <aliguori at us.ibm.com>
Date:   Tue Jun 18 10:06:47 2013 -0500

    Merge remote-tracking branch 'bonzini/scsi-next' into staging
    
    # By Paolo Bonzini (3) and others
    # Via Paolo Bonzini
    * bonzini/scsi-next:
      iscsi: reorganize iscsi_readcapacity_sync
      iscsi: simplify freeing of tasks
      vhost-scsi: fix k->set_guest_notifiers() NULL dereference
      scsi-disk: scsi-block device for scsi pass-through should not be removable
      scsi-generic: check the return value of bdrv_aio_ioctl in execute_command
      scsi-generic: fix sign extension of READ CAPACITY(10) data
      scsi: reset cdrom tray statuses on scsi_disk_reset
    
    Message-id: 1371565016-2643-1-git-send-email-pbonzini at redhat.com
    Signed-off-by: Anthony Liguori <aliguori at us.ibm.com>

commit 7d8f4b34a4bb6996d958da211f1dfb2686c7d82b
Merge: 5b45643 2330790
Author: Anthony Liguori <aliguori at us.ibm.com>
Date:   Tue Jun 18 10:06:39 2013 -0500

    Merge remote-tracking branch 'bonzini/nbd-next' into staging
    
    # By Ján Tomko
    # Via Paolo Bonzini
    * bonzini/nbd-next:
      nbd: strip braces from literal IPv6 address in URI
      qemu-socket: allow hostnames starting with a digit

commit 5b456438f5bb395ed6b1eec95e18ce7a7a884a0a
Author: Cole Robinson <crobinso at redhat.com>
Date:   Fri May 31 14:12:48 2013 -0400

    virtio-rng: Fix crash with non-default backend
    
    'default_backend' isn't always set, but 'rng' is, so use that.
    
    $ ./x86_64-softmmu/qemu-system-x86_64 -object rng-random,id=rng0,filename=/dev/random -device virtio-rng-pci,rng=rng0
    Segmentation fault (core dumped)
    
    Regressed with virtio refactoring in 59ccd20a9ac719cff82180429458728f03ec612f
    
    CC: qemu-stable at nongnu.org
    Signed-off-by: Cole Robinson <crobinso at redhat.com>
    Acked-by: Amit Shah <amit.shah at redhat.com>
    Acked-by: Michael S. Tsirkin <mst at redhat.com>
    Tested-by: Michael Roth <mdroth at linux.vnet.ibm.com>
    Message-id: bf4505014a0a941dbd3c62068f3cf2c496b69e6a.1370023944.git.crobinso at redhat.com
    Signed-off-by: Anthony Liguori <aliguori at us.ibm.com>

diff --git a/hw/s390x/s390-virtio-bus.c b/hw/s390x/s390-virtio-bus.c
index a1cdfb0..207eb82 100644
--- a/hw/s390x/s390-virtio-bus.c
+++ b/hw/s390x/s390-virtio-bus.c
@@ -300,7 +300,7 @@ static int s390_virtio_rng_init(VirtIOS390Device *s390_dev)
     }
 
     object_property_set_link(OBJECT(dev),
-                             OBJECT(dev->vdev.conf.default_backend), "rng",
+                             OBJECT(dev->vdev.conf.rng), "rng",
                              NULL);
 
     return s390_virtio_device_init(s390_dev, VIRTIO_DEVICE(vdev));
diff --git a/hw/s390x/virtio-ccw.c b/hw/s390x/virtio-ccw.c
index 44f5772..201a635 100644
--- a/hw/s390x/virtio-ccw.c
+++ b/hw/s390x/virtio-ccw.c
@@ -744,7 +744,7 @@ static int virtio_ccw_rng_init(VirtioCcwDevice *ccw_dev)
     }
 
     object_property_set_link(OBJECT(dev),
-                             OBJECT(dev->vdev.conf.default_backend), "rng",
+                             OBJECT(dev->vdev.conf.rng), "rng",
                              NULL);
 
     return virtio_ccw_device_init(ccw_dev, VIRTIO_DEVICE(vdev));
diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c
index 444b71a..b070b64 100644
--- a/hw/virtio/virtio-pci.c
+++ b/hw/virtio/virtio-pci.c
@@ -1455,7 +1455,7 @@ static int virtio_rng_pci_init(VirtIOPCIProxy *vpci_dev)
     }
 
     object_property_set_link(OBJECT(vrng),
-                             OBJECT(vrng->vdev.conf.default_backend), "rng",
+                             OBJECT(vrng->vdev.conf.rng), "rng",
                              NULL);
 
     return 0;
commit 1288844e7c4ede59509bf697a4cca0f2215a1ce3
Author: Paolo Bonzini <pbonzini at redhat.com>
Date:   Fri May 31 13:56:24 2013 +0200

    iscsi: reorganize iscsi_readcapacity_sync
    
    Avoid the goto, and use the same retry logic for the 10- and 16-
    byte versions.
    
    Cc: qemu-stable at nongnu.org
    Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>

diff --git a/block/iscsi.c b/block/iscsi.c
index 6171b01..0bbf0b1 100644
--- a/block/iscsi.c
+++ b/block/iscsi.c
@@ -931,60 +931,58 @@ static int iscsi_readcapacity_sync(IscsiLun *iscsilun)
     int ret = 0;
     int retries = ISCSI_CMD_RETRIES; 
 
-try_again:
-    switch (iscsilun->type) {
-    case TYPE_DISK:
-        task = iscsi_readcapacity16_sync(iscsilun->iscsi, iscsilun->lun);
-        if (task == NULL || task->status != SCSI_STATUS_GOOD) {
-            if (task != NULL && task->status == SCSI_STATUS_CHECK_CONDITION
-                    && task->sense.key == SCSI_SENSE_UNIT_ATTENTION
-                    && retries-- > 0) {
-                scsi_free_scsi_task(task);
-                goto try_again;
-            }
-            error_report("iSCSI: failed to send readcapacity16 command.");
-            ret = -EINVAL;
-            goto out;
-        }
-        rc16 = scsi_datain_unmarshall(task);
-        if (rc16 == NULL) {
-            error_report("iSCSI: Failed to unmarshall readcapacity16 data.");
-            ret = -EINVAL;
-            goto out;
-        }
-        iscsilun->block_size = rc16->block_length;
-        iscsilun->num_blocks = rc16->returned_lba + 1;
-        break;
-    case TYPE_ROM:
-        task = iscsi_readcapacity10_sync(iscsilun->iscsi, iscsilun->lun, 0, 0);
-        if (task == NULL || task->status != SCSI_STATUS_GOOD) {
-            error_report("iSCSI: failed to send readcapacity10 command.");
-            ret = -EINVAL;
-            goto out;
-        }
-        rc10 = scsi_datain_unmarshall(task);
-        if (rc10 == NULL) {
-            error_report("iSCSI: Failed to unmarshall readcapacity10 data.");
-            ret = -EINVAL;
-            goto out;
+    do {
+        if (task != NULL) {
+            scsi_free_scsi_task(task);
+            task = NULL;
         }
-        iscsilun->block_size = rc10->block_size;
-        if (rc10->lba == 0) {
-            /* blank disk loaded */
-            iscsilun->num_blocks = 0;
-        } else {
-            iscsilun->num_blocks = rc10->lba + 1;
+
+        switch (iscsilun->type) {
+        case TYPE_DISK:
+            task = iscsi_readcapacity16_sync(iscsilun->iscsi, iscsilun->lun);
+            if (task != NULL && task->status == SCSI_STATUS_GOOD) {
+                rc16 = scsi_datain_unmarshall(task);
+                if (rc16 == NULL) {
+                    error_report("iSCSI: Failed to unmarshall readcapacity16 data.");
+                    ret = -EINVAL;
+                } else {
+                    iscsilun->block_size = rc16->block_length;
+                    iscsilun->num_blocks = rc16->returned_lba + 1;
+                }
+            }
+            break;
+        case TYPE_ROM:
+            task = iscsi_readcapacity10_sync(iscsilun->iscsi, iscsilun->lun, 0, 0);
+            if (task != NULL && task->status == SCSI_STATUS_GOOD) {
+                rc10 = scsi_datain_unmarshall(task);
+                if (rc10 == NULL) {
+                    error_report("iSCSI: Failed to unmarshall readcapacity10 data.");
+                    ret = -EINVAL;
+                } else {
+                    iscsilun->block_size = rc10->block_size;
+                    if (rc10->lba == 0) {
+                        /* blank disk loaded */
+                        iscsilun->num_blocks = 0;
+                    } else {
+                        iscsilun->num_blocks = rc10->lba + 1;
+                    }
+                }
+            }
+            break;
+        default:
+            return 0;
         }
-        break;
-    default:
-        break;
-    }
+    } while (task != NULL && task->status == SCSI_STATUS_CHECK_CONDITION
+             && task->sense.key == SCSI_SENSE_UNIT_ATTENTION
+             && retries-- > 0);
 
-out:
+    if (task == NULL || task->status != SCSI_STATUS_GOOD) {
+        error_report("iSCSI: failed to send readcapacity10 command.");
+        ret = -EINVAL;
+    }
     if (task) {
         scsi_free_scsi_task(task);
     }
-
     return ret;
 }
 
commit f0d2a4d4d63dd2f0f3ecb2d591b979b0e7f24a22
Author: Paolo Bonzini <pbonzini at redhat.com>
Date:   Fri May 31 13:47:08 2013 +0200

    iscsi: simplify freeing of tasks
    
    Always free them in the iscsi_aio_*_acb functions and remove the
    checks in their callers.  Remove ifs when the task struct was
    previously dereferenced (spotted by Coverity).
    
    Cc: qemu-stable at nongnu.org
    Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>

diff --git a/block/iscsi.c b/block/iscsi.c
index f7199c1..6171b01 100644
--- a/block/iscsi.c
+++ b/block/iscsi.c
@@ -218,10 +218,8 @@ iscsi_aio_write16_cb(struct iscsi_context *iscsi, int status,
         if (status == SCSI_STATUS_CHECK_CONDITION
             && acb->task->sense.key == SCSI_SENSE_UNIT_ATTENTION
             && acb->retries-- > 0) {
-            if (acb->task != NULL) {
-                scsi_free_scsi_task(acb->task);
-                acb->task = NULL;
-            }
+            scsi_free_scsi_task(acb->task);
+            acb->task = NULL;
             if (iscsi_aio_writev_acb(acb) == 0) {
                 iscsi_set_events(acb->iscsilun);
                 return;
@@ -303,6 +301,7 @@ iscsi_aio_writev_acb(IscsiAIOCB *acb)
                                    acb);
 #endif
     if (ret != 0) {
+        scsi_free_scsi_task(acb->task);
         g_free(acb->buf);
         return -1;
     }
@@ -333,9 +332,6 @@ iscsi_aio_writev(BlockDriverState *bs, int64_t sector_num,
     acb->retries     = ISCSI_CMD_RETRIES;
 
     if (iscsi_aio_writev_acb(acb) != 0) {
-        if (acb->task) {
-            scsi_free_scsi_task(acb->task);
-        }
         qemu_aio_release(acb);
         return NULL;
     }
@@ -364,10 +360,8 @@ iscsi_aio_read16_cb(struct iscsi_context *iscsi, int status,
         if (status == SCSI_STATUS_CHECK_CONDITION
             && acb->task->sense.key == SCSI_SENSE_UNIT_ATTENTION
             && acb->retries-- > 0) {
-            if (acb->task != NULL) {
-                scsi_free_scsi_task(acb->task);
-                acb->task = NULL;
-            }
+            scsi_free_scsi_task(acb->task);
+            acb->task = NULL;
             if (iscsi_aio_readv_acb(acb) == 0) {
                 iscsi_set_events(acb->iscsilun);
                 return;
@@ -445,6 +439,7 @@ iscsi_aio_readv_acb(IscsiAIOCB *acb)
                                    NULL,
                                    acb);
     if (ret != 0) {
+        scsi_free_scsi_task(acb->task);
         return -1;
     }
 
@@ -480,9 +475,6 @@ iscsi_aio_readv(BlockDriverState *bs, int64_t sector_num,
     acb->retries     = ISCSI_CMD_RETRIES;
 
     if (iscsi_aio_readv_acb(acb) != 0) {
-        if (acb->task) {
-            scsi_free_scsi_task(acb->task);
-        }
         qemu_aio_release(acb);
         return NULL;
     }
@@ -509,10 +501,8 @@ iscsi_synccache10_cb(struct iscsi_context *iscsi, int status,
         if (status == SCSI_STATUS_CHECK_CONDITION
             && acb->task->sense.key == SCSI_SENSE_UNIT_ATTENTION
             && acb->retries-- > 0) {
-            if (acb->task != NULL) {
-                scsi_free_scsi_task(acb->task);
-                acb->task = NULL;
-            }
+            scsi_free_scsi_task(acb->task);
+            acb->task = NULL;
             if (iscsi_aio_flush_acb(acb) == 0) {
                 iscsi_set_events(acb->iscsilun);
                 return;
@@ -589,10 +579,8 @@ iscsi_unmap_cb(struct iscsi_context *iscsi, int status,
         if (status == SCSI_STATUS_CHECK_CONDITION
             && acb->task->sense.key == SCSI_SENSE_UNIT_ATTENTION
             && acb->retries-- > 0) {
-            if (acb->task != NULL) {
-                scsi_free_scsi_task(acb->task);
-                acb->task = NULL;
-            }
+            scsi_free_scsi_task(acb->task);
+            acb->task = NULL;
             if (iscsi_aio_discard_acb(acb) == 0) {
                 iscsi_set_events(acb->iscsilun);
                 return;
@@ -647,9 +635,6 @@ iscsi_aio_discard(BlockDriverState *bs,
     acb->retries     = ISCSI_CMD_RETRIES;
 
     if (iscsi_aio_discard_acb(acb) != 0) {
-        if (acb->task) {
-            scsi_free_scsi_task(acb->task);
-        }
         qemu_aio_release(acb);
         return NULL;
     }
commit 0e22a2d18998fd183c8181663981eb681ca977e9
Author: Stefan Hajnoczi <stefanha at redhat.com>
Date:   Thu May 30 16:14:44 2013 +0200

    vhost-scsi: fix k->set_guest_notifiers() NULL dereference
    
    Coverity picked up a copy-paste bug.  In vhost_scsi_start() we check for
    !k->set_guest_notifiers and error out.  The check probably got copied
    but instead of erroring we actually use the function pointer!
    
    Cc: Nicholas Bellinger <nab at linux-iscsi.org>
    Cc: Asias He <asias at redhat.com>
    Cc: qemu-stable at nongnu.org
    Signed-off-by: Stefan Hajnoczi <stefanha at redhat.com>
    Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>

diff --git a/hw/scsi/vhost-scsi.c b/hw/scsi/vhost-scsi.c
index d7a1c33..785e93f 100644
--- a/hw/scsi/vhost-scsi.c
+++ b/hw/scsi/vhost-scsi.c
@@ -123,7 +123,7 @@ static void vhost_scsi_stop(VHostSCSI *s)
     VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
     int ret = 0;
 
-    if (!k->set_guest_notifiers) {
+    if (k->set_guest_notifiers) {
         ret = k->set_guest_notifiers(qbus->parent, s->dev.nvqs, false);
         if (ret < 0) {
                 error_report("vhost guest notifier cleanup failed: %d\n", ret);
commit 18e673b8f350e1d789b8a50d209eb4a4252ebbda
Author: Pavel Hrdina <phrdina at redhat.com>
Date:   Wed May 29 15:47:23 2013 +0200

    scsi-disk: scsi-block device for scsi pass-through should not be removable
    
    This patch adds a new SCSI_DISK_F_NO_REMOVABLE_DEVOPS feature. By this
    feature we can set that the scsi-block (scsi pass-through) device will still
    be removable from the guest side, but from monitor it cannot be removed.
    
    Cc: qemu-stable at nongnu.org
    Signed-off-by: Pavel Hrdina <phrdina at redhat.com>
    Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>

diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c
index 02733dc..74e6a14 100644
--- a/hw/scsi/scsi-disk.c
+++ b/hw/scsi/scsi-disk.c
@@ -61,8 +61,9 @@ typedef struct SCSIDiskReq {
     BlockAcctCookie acct;
 } SCSIDiskReq;
 
-#define SCSI_DISK_F_REMOVABLE   0
-#define SCSI_DISK_F_DPOFUA      1
+#define SCSI_DISK_F_REMOVABLE             0
+#define SCSI_DISK_F_DPOFUA                1
+#define SCSI_DISK_F_NO_REMOVABLE_DEVOPS   2
 
 struct SCSIDiskState
 {
@@ -2110,7 +2111,8 @@ static int scsi_initfn(SCSIDevice *dev)
         return -1;
     }
 
-    if (s->features & (1 << SCSI_DISK_F_REMOVABLE)) {
+    if ((s->features & (1 << SCSI_DISK_F_REMOVABLE)) &&
+            !(s->features & (1 << SCSI_DISK_F_NO_REMOVABLE_DEVOPS))) {
         bdrv_set_dev_ops(s->qdev.conf.bs, &scsi_disk_removable_block_ops, s);
     } else {
         bdrv_set_dev_ops(s->qdev.conf.bs, &scsi_disk_block_ops, s);
@@ -2322,6 +2324,12 @@ static int scsi_block_initfn(SCSIDevice *dev)
     } else {
         s->qdev.blocksize = 512;
     }
+
+    /* Makes the scsi-block device not removable by using HMP and QMP eject
+     * command.
+     */
+    s->features |= (1 << SCSI_DISK_F_NO_REMOVABLE_DEVOPS);
+
     return scsi_initfn(&s->qdev);
 }
 
commit d836f8d35dc418e24c3b11e2ea67d361b867b650
Author: Pavel Hrdina <phrdina at redhat.com>
Date:   Wed May 29 14:12:10 2013 +0200

    scsi-generic: check the return value of bdrv_aio_ioctl in execute_command
    
    This fixes the bug introduced by this commit ad54ae80c73f.
    The bdrv_aio_ioctl() still could return null and we should return an error
    in that case.
    
    Cc: qemu-stable at nongnu.org
    Signed-off-by: Pavel Hrdina <phrdina at redhat.com>
    Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>

diff --git a/hw/scsi/scsi-generic.c b/hw/scsi/scsi-generic.c
index 19bd36c..8f195be 100644
--- a/hw/scsi/scsi-generic.c
+++ b/hw/scsi/scsi-generic.c
@@ -174,6 +174,9 @@ static int execute_command(BlockDriverState *bdrv,
     r->io_header.flags |= SG_FLAG_DIRECT_IO;
 
     r->req.aiocb = bdrv_aio_ioctl(bdrv, SG_IO, &r->io_header, complete, r);
+    if (r->req.aiocb == NULL) {
+        return -EIO;
+    }
 
     return 0;
 }
commit 53254e569f8e07501f3e0098bd57d2b780e52faa
Author: Paolo Bonzini <pbonzini at redhat.com>
Date:   Tue May 21 14:08:53 2013 +0200

    scsi-generic: fix sign extension of READ CAPACITY(10) data
    
    Issuing the READ CAPACITY(10) command in the guest will cause QEMU
    to update its knowledge of the maximum accessible LBA in the disk.
    The recorded maximum LBA will be wrong if the disk is bigger than
    1TB, because ldl_be_p returns a signed int.
    
    When this is fixed, a latent bug will be unmasked.  If the READ
    CAPACITY(10) command reported an overflow (0xFFFFFFFF), we must
    not overwrite the previously-known maximum accessible LBA, or the guest
    will fail to access the disk above the first 2TB.
    
    Cc: qemu-stable at nongnu.org
    Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>

diff --git a/hw/scsi/scsi-generic.c b/hw/scsi/scsi-generic.c
index 2a9a561..19bd36c 100644
--- a/hw/scsi/scsi-generic.c
+++ b/hw/scsi/scsi-generic.c
@@ -198,9 +198,10 @@ static void scsi_read_complete(void * opaque, int ret)
         scsi_command_complete(r, 0);
     } else {
         /* Snoop READ CAPACITY output to set the blocksize.  */
-        if (r->req.cmd.buf[0] == READ_CAPACITY_10) {
+        if (r->req.cmd.buf[0] == READ_CAPACITY_10 &&
+            (ldl_be_p(&r->buf[0]) != 0xffffffffU || s->max_lba == 0)) {
             s->blocksize = ldl_be_p(&r->buf[4]);
-            s->max_lba = ldl_be_p(&r->buf[0]);
+            s->max_lba = ldl_be_p(&r->buf[0]) & 0xffffffffULL;
         } else if (r->req.cmd.buf[0] == SERVICE_ACTION_IN_16 &&
                    (r->req.cmd.buf[1] & 31) == SAI_READ_CAPACITY_16) {
             s->blocksize = ldl_be_p(&r->buf[8]);
commit 7721c7f7c272fd2ed9d58500b7e99c33ab8a1af1
Author: Pavel Hrdina <phrdina at redhat.com>
Date:   Mon May 20 13:06:47 2013 +0200

    scsi: reset cdrom tray statuses on scsi_disk_reset
    
    Tray statuses should be also reset. Some guests may lock the tray and
    right after resetting the guest it should be unlocked and closed. This
    is done on power-on, reset and resume from suspend/hibernate on bare-metal.
    
    This fix is already committed for IDE CD.
    Check the commit a7f3d65b65b8c86a5ff0c0abcfefb45e2ec6fe4c.
    
    Test results on bare-metal:
      - on reset/power-on the CD-ROM tray is closed even before the monitor
        is turned on
      - on resume from suspend/hibernate the tray is also closed before
        the monitor is turned on
    
    From test results it seems that this behavior is OS and probably BIOS
    independent.
    
    Cc: qemu-stable at nongnu.org
    Signed-off-by: Pavel Hrdina <phrdina at redhat.com>
    Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>

diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c
index c8d2a99..02733dc 100644
--- a/hw/scsi/scsi-disk.c
+++ b/hw/scsi/scsi-disk.c
@@ -1984,6 +1984,9 @@ static void scsi_disk_reset(DeviceState *dev)
         nb_sectors--;
     }
     s->qdev.max_lba = nb_sectors;
+    /* reset tray statuses */
+    s->tray_locked = 0;
+    s->tray_open = 0;
 }
 
 static void scsi_destroy(SCSIDevice *dev)
commit 23307908790cd8fad91220863d7712c571ddc977
Author: Ján Tomko <jtomko at redhat.com>
Date:   Mon Jun 3 17:54:56 2013 +0200

    nbd: strip braces from literal IPv6 address in URI
    
    Otherwise they would get passed to getaddrinfo and fail with:
    address resolution failed for [::1]:1234: Name or service not known
    
    (Broken by commit v1.4.0-736-gf17c90b)
    
    Signed-off-by: Ján Tomko <jtomko at redhat.com>
    Cc: qemu-stable at nongnu.org
    Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>

diff --git a/block/nbd.c b/block/nbd.c
index 30e3b78..9c480b8 100644
--- a/block/nbd.c
+++ b/block/nbd.c
@@ -118,13 +118,22 @@ static int nbd_parse_uri(const char *filename, QDict *options)
         }
         qdict_put(options, "path", qstring_from_str(qp->p[0].value));
     } else {
+        QString *host;
         /* nbd[+tcp]://host[:port]/export */
         if (!uri->server) {
             ret = -EINVAL;
             goto out;
         }
 
-        qdict_put(options, "host", qstring_from_str(uri->server));
+        /* strip braces from literal IPv6 address */
+        if (uri->server[0] == '[') {
+            host = qstring_from_substr(uri->server, 1,
+                                       strlen(uri->server) - 2);
+        } else {
+            host = qstring_from_str(uri->server);
+        }
+
+        qdict_put(options, "host", host);
         if (uri->port) {
             char* port_str = g_strdup_printf("%d", uri->port);
             qdict_put(options, "port", qstring_from_str(port_str));
commit 391b7b9701ba3318e890ec0cba97a3c654bfa667
Author: Ján Tomko <jtomko at redhat.com>
Date:   Mon Jun 3 17:54:55 2013 +0200

    qemu-socket: allow hostnames starting with a digit
    
    According to RFC 1123 [1], hostnames can start with a digit too.
    
    [1] http://tools.ietf.org/html/rfc1123#page-13
    
    Signed-off-by: Ján Tomko <jtomko at redhat.com>
    Cc: qemu-stable at nongnu.org
    [Use strspn, not strcspn. - Paolo]
    Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>

diff --git a/util/qemu-sockets.c b/util/qemu-sockets.c
index fdd8dc4..96eca2a 100644
--- a/util/qemu-sockets.c
+++ b/util/qemu-sockets.c
@@ -24,7 +24,6 @@
 
 #include "monitor/monitor.h"
 #include "qemu/sockets.h"
-#include "qemu-common.h" /* for qemu_isdigit */
 #include "qemu/main-loop.h"
 
 #ifndef AI_ADDRCONFIG
@@ -511,19 +510,15 @@ InetSocketAddress *inet_parse(const char *str, Error **errp)
             goto fail;
         }
         addr->ipv6 = addr->has_ipv6 = true;
-    } else if (qemu_isdigit(str[0])) {
-        /* IPv4 addr */
-        if (2 != sscanf(str, "%64[0-9.]:%32[^,]%n", host, port, &pos)) {
-            error_setg(errp, "error parsing IPv4 address '%s'", str);
-            goto fail;
-        }
-        addr->ipv4 = addr->has_ipv4 = true;
     } else {
-        /* hostname */
+        /* hostname or IPv4 addr */
         if (2 != sscanf(str, "%64[^:]:%32[^,]%n", host, port, &pos)) {
             error_setg(errp, "error parsing address '%s'", str);
             goto fail;
         }
+        if (host[strspn(host, "0123456789.")] == '\0') {
+            addr->ipv4 = addr->has_ipv4 = true;
+        }
     }
 
     addr->host = g_strdup(host);
commit afd59989db90683fa127fec501d2633bcfbd6379
Author: Peter Crosthwaite <peter.crosthwaite at xilinx.com>
Date:   Tue Jun 11 11:00:40 2013 +1000

    intc/xilinx_intc: Dont lower IRQ when HIE cleared
    
    This is a little strange. It is lowering the parent IRQ pin on input
    when HIE is cleared. There is no such behaviour in the real hardware.
    
    ISR changes based on interrupt pin state are already guarded on HIE
    being set. So we can just delete this if in its entirety.
    
    Signed-off-by: Peter Crosthwaite <peter.crosthwaite at xilinx.com>
    Signed-off-by: Edgar E. Iglesias <edgar.iglesias at gmail.com>

diff --git a/hw/intc/xilinx_intc.c b/hw/intc/xilinx_intc.c
index ddedfa3..297f537 100644
--- a/hw/intc/xilinx_intc.c
+++ b/hw/intc/xilinx_intc.c
@@ -143,11 +143,6 @@ static void irq_handler(void *opaque, int irq, int level)
 {
     struct xlx_pic *p = opaque;
 
-    if (!(p->regs[R_MER] & 2)) {
-        qemu_irq_lower(p->parent_irq);
-        return;
-    }
-
     /* edge triggered interrupt */
     if (p->c_kind_of_intr & (1 << irq) && p->regs[R_MER] & 2) {
         p->regs[R_ISR] |= (level << irq);
commit fa96d6142f7f1947717c7c45c4d3141e5ab55167
Author: Peter Crosthwaite <peter.crosthwaite at xilinx.com>
Date:   Tue Jun 11 10:59:55 2013 +1000

    intc/xilinx_intc: Inhibit write to ISR when HIE
    
    When the Hardware Interrupt Enable (HIE) bit is set, software cannot
    change ISR. Add write guard accordingly.
    
    Signed-off-by: Peter Crosthwaite <peter.crosthwaite at xilinx.com>
    Signed-off-by: Edgar E. Iglesias <edgar.iglesias at gmail.com>

diff --git a/hw/intc/xilinx_intc.c b/hw/intc/xilinx_intc.c
index 09b4453..ddedfa3 100644
--- a/hw/intc/xilinx_intc.c
+++ b/hw/intc/xilinx_intc.c
@@ -116,6 +116,11 @@ pic_write(void *opaque, hwaddr addr,
         case R_CIE:
             p->regs[R_IER] &= ~value; /* Atomic clear ie.  */
             break;
+        case R_ISR:
+            if ((p->regs[R_MER] & 2)) {
+                break;
+            }
+            /* fallthrough */
         default:
             if (addr < ARRAY_SIZE(p->regs))
                 p->regs[addr] = value;
commit 45fdd3bf5a00466cb0f762c638291a5446773dc9
Author: Peter Crosthwaite <peter.crosthwaite at xilinx.com>
Date:   Tue Jun 11 10:59:09 2013 +1000

    intc/xilinx_intc: Handle level interrupt retriggering
    
    Acking a level sensitive interrupt should have no effect if the
    interrupt pin is still asserted. The current implementation requires
    and edge condition to occur for setting a level sensitive IRQ, which
    means an ACK can clear a level sensitive interrupt, until the original
    source strobes the interrupt again.
    
    Fix by keeping track of the interrupt pin state and setting ISR based
    on this every time update_irq() is called.
    
    Signed-off-by: Peter Crosthwaite <peter.crosthwaite at xilinx.com>
    Signed-off-by: Edgar E. Iglesias <edgar.iglesias at gmail.com>

diff --git a/hw/intc/xilinx_intc.c b/hw/intc/xilinx_intc.c
index d243a00..09b4453 100644
--- a/hw/intc/xilinx_intc.c
+++ b/hw/intc/xilinx_intc.c
@@ -49,11 +49,19 @@ struct xlx_pic
 
     /* Runtime control registers.  */
     uint32_t regs[R_MAX];
+    /* state of the interrupt input pins */
+    uint32_t irq_pin_state;
 };
 
 static void update_irq(struct xlx_pic *p)
 {
     uint32_t i;
+
+    /* level triggered interrupt */
+    if (p->regs[R_MER] & 2) {
+        p->regs[R_ISR] |= p->irq_pin_state & ~p->c_kind_of_intr;
+    }
+
     /* Update the pending register.  */
     p->regs[R_IPR] = p->regs[R_ISR] & p->regs[R_IER];
 
@@ -135,7 +143,13 @@ static void irq_handler(void *opaque, int irq, int level)
         return;
     }
 
-    p->regs[R_ISR] |= (level << irq);
+    /* edge triggered interrupt */
+    if (p->c_kind_of_intr & (1 << irq) && p->regs[R_MER] & 2) {
+        p->regs[R_ISR] |= (level << irq);
+    }
+
+    p->irq_pin_state &= ~(1 << irq);
+    p->irq_pin_state |= level << irq;
     update_irq(p);
 }
 
commit 6327c221fff955ee979559ec85c148963e06d78f
Author: Peter Crosthwaite <peter.crosthwaite at xilinx.com>
Date:   Tue Jun 11 10:58:25 2013 +1000

    intc/xilinx_intc: Don't clear level sens. IRQs without ACK
    
    For level sensitive interrupts, ISR bits are cleared when the input pin
    is lowered. This is incorrect. Only software can clear ISR bits (via
    IAR or direct write to ISR with !MER(2)).
    
    Signed-off-by: Peter Crosthwaite <peter.crosthwaite at xilinx.com>
    Signed-off-by: Edgar E. Iglesias <edgar.iglesias at gmail.com>

diff --git a/hw/intc/xilinx_intc.c b/hw/intc/xilinx_intc.c
index 5df7008..d243a00 100644
--- a/hw/intc/xilinx_intc.c
+++ b/hw/intc/xilinx_intc.c
@@ -135,13 +135,7 @@ static void irq_handler(void *opaque, int irq, int level)
         return;
     }
 
-    /* Update source flops. Don't clear unless level triggered.
-       Edge triggered interrupts only go away when explicitely acked to
-       the interrupt controller.  */
-    if (!(p->c_kind_of_intr & (1 << irq)) || level) {
-        p->regs[R_ISR] &= ~(1 << irq);
-        p->regs[R_ISR] |= (level << irq);
-    }
+    p->regs[R_ISR] |= (level << irq);
     update_irq(p);
 }
 
commit 37a011e9bade7bcbdd41addffc7c94cbf628404c
Author: Peter Crosthwaite <peter.crosthwaite at xilinx.com>
Date:   Tue Jun 11 10:57:41 2013 +1000

    microblaze/petalogix_s3adsp1800_mmu: Fix UART IRQ
    
    The UART IRQ is edge sensitive, whereas the machine was registering it
    as level sensitive. Fix.
    
    Signed-off-by: Peter Crosthwaite <peter.crosthwaite at xilinx.com>
    Signed-off-by: Edgar E. Iglesias <edgar.iglesias at gmail.com>

diff --git a/hw/microblaze/petalogix_s3adsp1800_mmu.c b/hw/microblaze/petalogix_s3adsp1800_mmu.c
index 7c258f0..b3bcd4e 100644
--- a/hw/microblaze/petalogix_s3adsp1800_mmu.c
+++ b/hw/microblaze/petalogix_s3adsp1800_mmu.c
@@ -97,7 +97,7 @@ petalogix_s3adsp1800_init(QEMUMachineInitArgs *args)
                           1, 0x89, 0x18, 0x0000, 0x0, 1);
 
     cpu_irq = microblaze_pic_init_cpu(env);
-    dev = xilinx_intc_create(INTC_BASEADDR, cpu_irq[0], 2);
+    dev = xilinx_intc_create(INTC_BASEADDR, cpu_irq[0], 0xA);
     for (i = 0; i < 32; i++) {
         irq[i] = qdev_get_gpio_in(dev, i);
     }


More information about the Spice-commits mailing list