Tue Oct 30 04:25:17 PDT 2012

Makefile                            |   16 
 Makefile.objs                       |    9 
 QMP/qmp-events.txt                  |   18 +
 QMP/qmp.py                          |   21 +
 block.c                             |  326 +++++++++++++------
 block.h                             |    9 
 block/Makefile.objs                 |    1 
 block/commit.c                      |   11 
 block/mirror.c                      |  322 ++++++++++++++++++
 block/stream.c                      |    4 
 block_int.h                         |   26 +
 blockdev-nbd.c                      |  119 ++++++
 blockdev.c                          |  182 +++++++++-
 blockjob.c                          |   36 ++
 blockjob.h                          |   41 ++
 configure                           |   26 +
 console.h                           |    2 
 default-configs/sparc64-softmmu.mak |    1 
 error.c                             |   28 +
 error.h                             |    9 
 hmp-commands.hx                     |   38 ++
 hmp.c                               |   50 ++
 hmp.h                               |    2 
 hw/ac97.c                           |  109 +++++-
 hw/es1370.c                         |   46 ++
 hw/i8254.c                          |   20 -
 hw/kvm/pci-assign.c                 |    8 
 hw/m48t59.c                         |   24 -
 hw/mc146818rtc.c                    |   19 -
 hw/msi.c                            |   45 +-
 hw/msi.h                            |    1 
 hw/nseries.c                        |    3 
 hw/pc.c                             |   19 -
 hw/pc_piix.c                        |   11 
 hw/pci.c                            |   42 ++
 hw/pci.h                            |    3 
 hw/pci_bridge.c                     |   50 +-
 hw/pci_ids.h                        |    2 
 hw/pci_internals.h                  |   24 -
 hw/pcie_host.c                      |   35 +-
 hw/pcie_host.h                      |   11 
 hw/pckbd.c                          |   48 +-
 hw/ppc/Makefile.objs                |    1 
 hw/ppc/e500.c                       |    5 
 hw/ppc440_bamboo.c                  |    2 
 hw/ppc_newworld.c                   |   19 -
 hw/ppc_oldworld.c                   |    2 
 hw/ppc_prep.c                       |    2 
 hw/ppce500_pci.c                    |    9 
 hw/pxa2xx.c                         |    4 
 hw/realview.c                       |    2 
 hw/rtl8139.c                        |   78 ++--
 hw/s390-virtio.c                    |    6 
 hw/s390x/Makefile.objs              |    3 
 hw/s390x/event-facility.c           |  398 +++++++++++++++++++++++
 hw/s390x/event-facility.h           |   96 +++++
 hw/s390x/sclp.c                     |  163 +++++++++
 hw/s390x/sclp.h                     |  118 ++++++
 hw/s390x/sclpconsole.c              |  306 +++++++++++++++++
 hw/s390x/sclpquiesce.c              |  123 +++++++
 hw/serial.c                         |   30 -
 hw/spapr.c                          |   16 
 hw/spapr.h                          |    8 
 hw/spapr_events.c                   |  321 ++++++++++++++++++
 hw/spapr_hcall.c                    |   29 -
 hw/spapr_pci.c                      |   44 --
 hw/spapr_pci.h                      |    2 
 hw/spapr_rtas.c                     |    9 
 hw/usb.h                            |   28 +
 hw/usb/bus.c                        |    8 
 hw/usb/core.c                       |   28 +
 hw/usb/hcd-ehci.c                   |  105 +++---
 hw/usb/hcd-musb.c                   |    3 
 hw/usb/hcd-ohci.c                   |    9 
 hw/usb/hcd-uhci.c                   |  380 ++++++++++------------
 hw/usb/hcd-xhci.c                   |  169 ++++++---
 hw/usb/host-linux.c                 |    3 
 hw/usb/redirect.c                   |   18 -
 hw/versatilepb.c                    |    2 
 hw/vhost_net.c                      |   13 
 hw/virtio-net.c                     |  176 +++++-----
 hw/virtio-pci.c                     |  126 ++-----
 hw/vmport.c                         |   21 -
 hw/xen_platform.c                   |   48 ++
 hw/xtensa_lx60.c                    |   30 -
 hw/xtensa_sim.c                     |   27 -
 iov.c                               |   23 +
 iov.h                               |    9 
 memory.c                            |   12 
 migration-exec.c                    |   26 -
 migration-fd.c                      |   27 -
 migration-tcp.c                     |   19 -
 migration-unix.c                    |   95 -----
 migration.c                         |   36 --
 migration.h                         |   19 -
 monitor.c                           |  143 +++++---
 monitor.h                           |    4 
 nbd.c                               |   39 +-
 osdep.c                             |   12 
 osdep.h                             |    2 
 qapi-schema.json                    |  225 ++++++++++++-
 qemu-char.c                         |   24 -
 qemu-config.c                       |   26 +
 qemu-img-cmds.hx                    |    4 
 qemu-img.c                          |  219 ++++++++++--
 qemu-img.texi                       |   25 +
 qemu-options.hx                     |   36 ++
 qemu-sockets.c                      |  424 ++++++++++++++++--------
 qemu-timer.c                        |    4 
 qemu-tool.c                         |    6 
 qemu_socket.h                       |   19 -
 qerror.h                            |    9 
 qga/channel-posix.c                 |    8 
 qmp-commands.hx                     |   69 ++++
 qmp.c                               |   23 -
 scripts/kvm/kvm_stat                |   11 
 sysemu.h                            |    4 
 target-alpha/helper.h               |  176 +++++-----
 target-arm/arm-semi.c               |  167 ++++++---
 target-arm/helper.c                 |    5 
 target-arm/helper.h                 |   18 -
 target-arm/neon_helper.c            |    6 
 target-arm/op_helper.c              |    2 
 target-arm/translate.c              |   15 
 target-cris/helper.h                |   18 -
 target-i386/cpu.c                   |   34 +
 target-i386/cpu.h                   |    2 
 target-i386/helper.h                |    4 
 target-microblaze/helper.h          |    6 
 target-mips/helper.h                |  106 +++---
 target-mips/translate.c             |   96 +++--
 target-ppc/cpu.h                    |    7 
 target-ppc/helper.h                 |   38 +-
 target-ppc/mmu_helper.c             |    2 
 target-ppc/translate_init.c         |    9 
 target-s390x/cpu.h                  |   13 
 target-s390x/helper.h               |   76 ++--
 target-s390x/kvm.c                  |  117 +++++-
 target-s390x/misc_helper.c          |   45 --
 target-sh4/helper.h                 |    6 
 target-sparc/helper.h               |   50 +-
 target-xtensa/helper.h              |   16 
 tcg/README                          |   22 -
 tcg/i386/tcg-target.c               |  218 +++++++-----
 tcg/mips/tcg-target.c               |    7 
 tcg/optimize.c                      |    3 
 tcg/tcg-op.h                        |   18 -
 tcg/tcg-opc.h                       |   29 -
 tcg/tcg.c                           |  449 ++++++++++++++++----------
 tcg/tcg.h                           |   29 +
 tests/Makefile                      |   10 
 tests/qemu-iotests/040              |  106 ++++++
 tests/qemu-iotests/040.out          |    4 
 tests/qemu-iotests/041              |  615 ++++++++++++++++++++++++++++++++++++
 tests/qemu-iotests/041.out          |    5 
 tests/qemu-iotests/042              |   78 ++++
 tests/qemu-iotests/042.out          |   15 
 tests/qemu-iotests/043              |   95 +++++
 tests/qemu-iotests/043.out          |   66 +++
 tests/qemu-iotests/common.rc        |   10 
 tests/qemu-iotests/group            |    3 
 tests/qemu-iotests/iotests.py       |    4 
 tests/tcg/hello-i386.c              |    3 
 tests/tcg/test-i386.c               |    2 
 trace-events                        |   10 
 ui/vnc.c                            |   93 ++---
 vl.c                                |  151 +++++++-
 167 files changed, 7126 insertions(+), 2362 deletions(-)

New commits:
commit 38c4718392fda2bda1e084366b9aa41b49b9d8cf
Merge: 3585317... 130c57c...
Author: Aurelien Jarno <aurelien at aurel32.net>
Date:   Tue Oct 30 00:35:43 2012 +0100

    Merge branch 's390-for-upstream' of git://repo.or.cz/qemu/agraf
    
    * 's390-for-upstream' of git://repo.or.cz/qemu/agraf:
      s390: sclp ascii console support
      s390: sclp signal quiesce support
      s390: sclp event support
      s390: sclp base support
      s390: use sync regs for register transfer
      s390/kvm_stat: correct sys_perf_event_open syscall number
      s390x: fix -initrd in virtio machine

commit 3585317f6fdff0e3ae082f88afb2784d815a07e2
Author: Aurelien Jarno <aurelien at aurel32.net>
Date:   Fri Oct 19 23:48:13 2012 +0200

    tcg/mips: use MUL instead of MULT on MIPS32 and above
    
    MIPS32 and later instruction sets have a multiplication instruction
    directly operating on GPRs. It only produces a 32-bit result but
    it is exactly what is needed by QEMU.
    
    Reviewed-by: Richard Henderson <rth at twiddle.net>
    Signed-off-by: Aurelien Jarno <aurelien at aurel32.net>

diff --git a/tcg/mips/tcg-target.c b/tcg/mips/tcg-target.c
index 7e4013e..ae2b274 100644
--- a/tcg/mips/tcg-target.c
+++ b/tcg/mips/tcg-target.c
@@ -323,6 +323,9 @@ enum {
     OPC_BLTZ     = OPC_REGIMM | (0x00 << 16),
     OPC_BGEZ     = OPC_REGIMM | (0x01 << 16),
 
+    OPC_SPECIAL2 = 0x1c << 26,
+    OPC_MUL      = OPC_SPECIAL2 | 0x002,
+
     OPC_SPECIAL3 = 0x1f << 26,
     OPC_INS      = OPC_SPECIAL3 | 0x004,
     OPC_WSBH     = OPC_SPECIAL3 | 0x0a0,
@@ -1403,8 +1406,12 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
         tcg_out_mov(s, TCG_TYPE_I32, args[0], TCG_REG_AT);
         break;
     case INDEX_op_mul_i32:
+#if defined(__mips_isa_rev) && (__mips_isa_rev >= 1)
+        tcg_out_opc_reg(s, OPC_MUL, args[0], args[1], args[2]);
+#else
         tcg_out_opc_reg(s, OPC_MULT, 0, args[1], args[2]);
         tcg_out_opc_reg(s, OPC_MFLO, args[0], 0, 0);
+#endif
         break;
     case INDEX_op_mulu2_i32:
         tcg_out_opc_reg(s, OPC_MULTU, 0, args[2], args[3]);
commit d26a8caea3f160782841efb87b5e8bea606b512b
Author: Avi Kivity <avi at redhat.com>
Date:   Mon Oct 29 18:22:36 2012 +0200

    memory: fix rendering of a region obscured by another
    
    The memory core drops regions that are hidden by another region (for example,
    during BAR sizing), but it doesn't do so correctly if the lower address of the
    existing range is below the lower address of the new range.
    
    Example (qemu-system-mips -M malta -kernel vmlinux-2.6.32-5-4kc-malta
             -append "console=ttyS0"  -nographic -vga cirrus):
    
    Existing range: 10000000-107fffff
    New range:      100a0000-100bffff
    
    Correct behaviour: drop new range
    Incorrect behaviour: add new range
    
    Fix by taking this case into account (previously we only considered
    equal lower boundaries).
    
    Tested-by: Aurelien Jarno <aurelien at aurel32.net>
    Signed-off-by: Avi Kivity <avi at redhat.com>
    Signed-off-by: Anthony Liguori <aliguori at us.ibm.com>

diff --git a/memory.c b/memory.c
index 36bb9a5..243cb23 100644
--- a/memory.c
+++ b/memory.c
@@ -539,12 +539,12 @@ static void render_memory_region(FlatView *view,
             offset_in_region += int128_get64(now);
             int128_subfrom(&remain, now);
         }
-        if (int128_eq(base, view->ranges[i].addr.start)) {
-            now = int128_min(remain, view->ranges[i].addr.size);
-            int128_addto(&base, now);
-            offset_in_region += int128_get64(now);
-            int128_subfrom(&remain, now);
-        }
+        now = int128_sub(int128_min(int128_add(base, remain),
+                                    addrrange_end(view->ranges[i].addr)),
+                         base);
+        int128_addto(&base, now);
+        offset_in_region += int128_get64(now);
+        int128_subfrom(&remain, now);
     }
     if (int128_nz(remain)) {
         fr.mr = mr;
commit 233926fafa6c4a0fb666e1469524d66dd3b47ddd
Merge: b308c82... 523a59f...
Author: Anthony Liguori <aliguori at us.ibm.com>
Date:   Mon Oct 29 14:31:47 2012 -0500

    Merge remote-tracking branch 'mst/tags/for_anthony' into staging
    
    virtio,pci infrastructure
    
    This includes infrastructure patches that don't do much by themselves
    but should help vfio and q35 make progress.
    Also included is rework of virtio-net to use iovec APIs
    for vector access - helpful to make it more secure
    and in preparation for a new feature that will allow
    arbitrary s/g layout for guests.
    Also included is a pci bridge bugfix by Avi.
    
    Signed-off-by: Michael S. Tsirkin <mst at redhat.com>
    
    * mst/tags/for_anthony: (25 commits)
      pci: avoid destroying bridge address space windows in a transaction
      virtio-net: enable mrg buf header in tap on linux
      virtio-net: test peer header support at init time
      virtio-net: minor code simplification
      virtio-net: simplify rx code
      virtio-net: switch tx to safe iov functions
      virtio-net: first s/g is always at start of buf
      virtio-net: refactor receive_hdr
      virtio-net: use safe iov operations for rx
      virtio-net: avoid sg copy
      iov: add iov_cpy
      virtio-net: track host/guest header length
      pcie: Convert PCIExpressHost to use the QOM.
      pcie: pass pcie window size to pcie_host_mmcfg_update()
      pci: Add class 0xc05 as 'SMBus'
      pci: introduce pci_swizzle_map_irq_fn() for standardized interrupt pin swizzle
      pci_ids: add intel 82801BA pci-to-pci bridge id
      pci: pci capability must be in PCI space
      pci: make each capability DWORD aligned
      qemu: enable PV EOI for qemu 1.3
      ...
    
    Signed-off-by: Anthony Liguori <aliguori at us.ibm.com>

commit 130c57c036fdc2eba8936da1b1dad39e78d5ea32
Author: Heinz Graalfs <graalfs at linux.vnet.ibm.com>
Date:   Mon Oct 29 02:13:25 2012 +0000

    s390: sclp ascii console support
    
    This code adds console support  by implementing SCLP's ASCII Console
    Data event. This is the same console as LPARs ASCII console or z/VMs
    sysascii.
    
    The console can be specified manually with something like
    -chardev stdio,id=charconsole0 -device sclpconsole,chardev=charconsole0,id=console0
    
    Newer kernels will autodetect that console and prefer that over virtio
    console.
    
    When data is received from the character layer it creates a service
    interrupt to trigger a Read Event Data command from the guest that will
    pick up the received character byte-stream.
    When characters are echo'ed by the linux guest a Write Event Data occurs
    which is forwarded by the Event Facility to the console that supports
    a corresponding mask value.
    Console resizing is not supported.
    The character layer byte-stream is buffered using a fixed size iov
    buffer.
    
    Signed-off-by: Heinz Graalfs <graalfs at linux.vnet.ibm.com>
    Signed-off-by: Christian Borntraeger <borntraeger at de.ibm.com>
    Signed-off-by: Jens Freimann <jfrei at linux.vnet.ibm.com>
    Signed-off-by: Alexander Graf <agraf at suse.de>

diff --git a/hw/s390x/Makefile.objs b/hw/s390x/Makefile.objs
index ed4e61a..096dfcd 100644
--- a/hw/s390x/Makefile.objs
+++ b/hw/s390x/Makefile.objs
@@ -3,4 +3,4 @@ obj-y = s390-virtio-bus.o s390-virtio.o
 obj-y := $(addprefix ../,$(obj-y))
 obj-y += sclp.o
 obj-y += event-facility.o
-obj-y += sclpquiesce.o
+obj-y += sclpquiesce.o sclpconsole.o
diff --git a/hw/s390x/sclpconsole.c b/hw/s390x/sclpconsole.c
new file mode 100644
index 0000000..0ec5623
--- /dev/null
+++ b/hw/s390x/sclpconsole.c
@@ -0,0 +1,306 @@
+/*
+ * SCLP event type
+ *    Ascii Console Data (VT220 Console)
+ *
+ * Copyright IBM, Corp. 2012
+ *
+ * Authors:
+ *  Heinz Graalfs <graalfs at de.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or (at your
+ * option) any later version.  See the COPYING file in the top-level directory.
+ *
+ */
+
+#include <hw/qdev.h>
+#include "qemu-thread.h"
+
+#include "sclp.h"
+#include "event-facility.h"
+
+typedef struct ASCIIConsoleData {
+    EventBufferHeader ebh;
+    char data[0];
+} QEMU_PACKED ASCIIConsoleData;
+
+/* max size for ASCII data in 4K SCCB page */
+#define SIZE_BUFFER_VT220 4080
+
+typedef struct SCLPConsole {
+    SCLPEvent event;
+    CharDriverState *chr;
+    /* io vector                                                       */
+    uint8_t *iov;           /* iov buffer pointer                      */
+    uint8_t *iov_sclp;      /* pointer to SCLP read offset             */
+    uint8_t *iov_bs;        /* pointer byte stream read offset         */
+    uint32_t iov_data_len;  /* length of byte stream in buffer         */
+    uint32_t iov_sclp_rest; /* length of byte stream not read via SCLP */
+    qemu_irq irq_read_vt220;
+} SCLPConsole;
+
+/* character layer call-back functions */
+
+/* Return number of bytes that fit into iov buffer */
+static int chr_can_read(void *opaque)
+{
+    int can_read;
+    SCLPConsole *scon = opaque;
+
+    can_read = SIZE_BUFFER_VT220 - scon->iov_data_len;
+
+    return can_read;
+}
+
+/* Receive n bytes from character layer, save in iov buffer,
+ * and set event pending */
+static void receive_from_chr_layer(SCLPConsole *scon, const uint8_t *buf,
+                                   int size)
+{
+    assert(scon->iov);
+
+    /* read data must fit into current buffer */
+    assert(size <= SIZE_BUFFER_VT220 - scon->iov_data_len);
+
+    /* put byte-stream from character layer into buffer */
+    memcpy(scon->iov_bs, buf, size);
+    scon->iov_data_len += size;
+    scon->iov_sclp_rest += size;
+    scon->iov_bs += size;
+    scon->event.event_pending = true;
+}
+
+/* Send data from a char device over to the guest */
+static void chr_read(void *opaque, const uint8_t *buf, int size)
+{
+    SCLPConsole *scon = opaque;
+
+    assert(scon);
+
+    receive_from_chr_layer(scon, buf, size);
+    /* trigger SCLP read operation */
+    qemu_irq_raise(scon->irq_read_vt220);
+}
+
+static void chr_event(void *opaque, int event)
+{
+    SCLPConsole *scon = opaque;
+
+    switch (event) {
+    case CHR_EVENT_OPENED:
+        if (!scon->iov) {
+            scon->iov = g_malloc0(SIZE_BUFFER_VT220);
+            scon->iov_sclp = scon->iov;
+            scon->iov_bs = scon->iov;
+            scon->iov_data_len = 0;
+            scon->iov_sclp_rest = 0;
+        }
+        break;
+    case CHR_EVENT_CLOSED:
+        if (scon->iov) {
+            g_free(scon->iov);
+            scon->iov = NULL;
+        }
+        break;
+    }
+}
+
+/* functions to be called by event facility */
+
+static int event_type(void)
+{
+    return SCLP_EVENT_ASCII_CONSOLE_DATA;
+}
+
+static unsigned int send_mask(void)
+{
+    return SCLP_EVENT_MASK_MSG_ASCII;
+}
+
+static unsigned int receive_mask(void)
+{
+    return SCLP_EVENT_MASK_MSG_ASCII;
+}
+
+/* triggered by SCLP's read_event_data -
+ * copy console data byte-stream into provided (SCLP) buffer
+ */
+static void get_console_data(SCLPEvent *event, uint8_t *buf, size_t *size,
+                             int avail)
+{
+    SCLPConsole *cons = DO_UPCAST(SCLPConsole, event, event);
+
+    /* first byte is hex 0 saying an ascii string follows */
+    *buf++ = '\0';
+    avail--;
+    /* if all data fit into provided SCLP buffer */
+    if (avail >= cons->iov_sclp_rest) {
+        /* copy character byte-stream to SCLP buffer */
+        memcpy(buf, cons->iov_sclp, cons->iov_sclp_rest);
+        *size = cons->iov_sclp_rest + 1;
+        cons->iov_sclp = cons->iov;
+        cons->iov_bs = cons->iov;
+        cons->iov_data_len = 0;
+        cons->iov_sclp_rest = 0;
+        event->event_pending = false;
+        /* data provided and no more data pending */
+    } else {
+        /* if provided buffer is too small, just copy part */
+        memcpy(buf, cons->iov_sclp, avail);
+        *size = avail + 1;
+        cons->iov_sclp_rest -= avail;
+        cons->iov_sclp += avail;
+        /* more data pending */
+    }
+}
+
+static int read_event_data(SCLPEvent *event, EventBufferHeader *evt_buf_hdr,
+                           int *slen)
+{
+    int avail;
+    size_t src_len;
+    uint8_t *to;
+    ASCIIConsoleData *acd = (ASCIIConsoleData *) evt_buf_hdr;
+
+    if (!event->event_pending) {
+        /* no data pending */
+        return 0;
+    }
+
+    to = (uint8_t *)&acd->data;
+    avail = *slen - sizeof(ASCIIConsoleData);
+    get_console_data(event, to, &src_len, avail);
+
+    acd->ebh.length = cpu_to_be16(sizeof(ASCIIConsoleData) + src_len);
+    acd->ebh.type = SCLP_EVENT_ASCII_CONSOLE_DATA;
+    acd->ebh.flags |= SCLP_EVENT_BUFFER_ACCEPTED;
+    *slen = avail - src_len;
+
+    return 1;
+}
+
+/* triggered by SCLP's write_event_data
+ *  - write console data into character layer
+ *  returns < 0 if an error occured
+ */
+static ssize_t write_console_data(SCLPEvent *event, const uint8_t *buf,
+                                  size_t len)
+{
+    ssize_t ret = 0;
+    const uint8_t *iov_offset;
+    SCLPConsole *scon = DO_UPCAST(SCLPConsole, event, event);
+
+    if (!scon->chr) {
+        /* If there's no backend, we can just say we consumed all data. */
+        return len;
+    }
+
+    iov_offset = buf;
+    while (len > 0) {
+        ret = qemu_chr_fe_write(scon->chr, buf, len);
+        if (ret == 0) {
+            /* a pty doesn't seem to be connected - no error */
+            len = 0;
+        } else if (ret == -EAGAIN || (ret > 0 && ret < len)) {
+            len -= ret;
+            iov_offset += ret;
+        } else {
+            len = 0;
+        }
+    }
+
+    return ret;
+}
+
+static int write_event_data(SCLPEvent *event, EventBufferHeader *evt_buf_hdr)
+{
+    int rc;
+    int length;
+    ssize_t written;
+    ASCIIConsoleData *acd = (ASCIIConsoleData *) evt_buf_hdr;
+
+    length = be16_to_cpu(evt_buf_hdr->length) - sizeof(EventBufferHeader);
+    written = write_console_data(event, (uint8_t *)acd->data, length);
+
+    rc = SCLP_RC_NORMAL_COMPLETION;
+    /* set event buffer accepted flag */
+    evt_buf_hdr->flags |= SCLP_EVENT_BUFFER_ACCEPTED;
+
+    /* written will be zero if a pty is not connected - don't treat as error */
+    if (written < 0) {
+        /* event buffer not accepted due to error in character layer */
+        evt_buf_hdr->flags &= ~(SCLP_EVENT_BUFFER_ACCEPTED);
+        rc = SCLP_RC_CONTAINED_EQUIPMENT_CHECK;
+    }
+
+    return rc;
+}
+
+static void trigger_ascii_console_data(void *env, int n, int level)
+{
+    sclp_service_interrupt(0);
+}
+
+/* qemu object creation and initialization functions */
+
+/* tell character layer our call-back functions */
+static int console_init(SCLPEvent *event)
+{
+    static bool console_available;
+
+    SCLPConsole *scon = DO_UPCAST(SCLPConsole, event, event);
+
+    if (console_available) {
+        error_report("Multiple VT220 operator consoles are not supported");
+        return -1;
+    }
+    console_available = true;
+    event->event_type = SCLP_EVENT_ASCII_CONSOLE_DATA;
+    if (scon->chr) {
+        qemu_chr_add_handlers(scon->chr, chr_can_read,
+                              chr_read, chr_event, scon);
+    }
+    scon->irq_read_vt220 = *qemu_allocate_irqs(trigger_ascii_console_data,
+                                               NULL, 1);
+
+    return 0;
+}
+
+static int console_exit(SCLPEvent *event)
+{
+    return 0;
+}
+
+static Property console_properties[] = {
+    DEFINE_PROP_CHR("chardev", SCLPConsole, chr),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
+static void console_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    SCLPEventClass *ec = SCLP_EVENT_CLASS(klass);
+
+    dc->props = console_properties;
+    ec->init = console_init;
+    ec->exit = console_exit;
+    ec->get_send_mask = send_mask;
+    ec->get_receive_mask = receive_mask;
+    ec->event_type = event_type;
+    ec->read_event_data = read_event_data;
+    ec->write_event_data = write_event_data;
+}
+
+static TypeInfo sclp_console_info = {
+    .name          = "sclpconsole",
+    .parent        = TYPE_SCLP_EVENT,
+    .instance_size = sizeof(SCLPConsole),
+    .class_init    = console_class_init,
+    .class_size    = sizeof(SCLPEventClass),
+};
+
+static void register_types(void)
+{
+    type_register_static(&sclp_console_info);
+}
+
+type_init(register_types)
commit ab9074b559823a22822db9e299198c53169c34ea
Author: Heinz Graalfs <graalfs at linux.vnet.ibm.com>
Date:   Mon Oct 29 02:13:24 2012 +0000

    s390: sclp signal quiesce support
    
    This implements the sclp signal quiesce event via the SCLP Event
    Facility.
    This allows to gracefully shutdown a guest by using system_powerdown
    notifiers. It creates a service interrupt that will trigger a
    Read Event Data command from the guest. This code will then add an
    event that is interpreted by linux guests as ctrl-alt-del.
    
    Signed-off-by: Heinz Graalfs <graalfs at linux.vnet.ibm.com>
    Signed-off-by: Christian Borntraeger <borntraeger at de.ibm.com>
    Signed-off-by: Jens Freimann <jfrei at linux.vnet.ibm.com>
    Signed-off-by: Alexander Graf <agraf at suse.de>

diff --git a/hw/s390x/Makefile.objs b/hw/s390x/Makefile.objs
index b32fc52..ed4e61a 100644
--- a/hw/s390x/Makefile.objs
+++ b/hw/s390x/Makefile.objs
@@ -3,3 +3,4 @@ obj-y = s390-virtio-bus.o s390-virtio.o
 obj-y := $(addprefix ../,$(obj-y))
 obj-y += sclp.o
 obj-y += event-facility.o
+obj-y += sclpquiesce.o
diff --git a/hw/s390x/event-facility.c b/hw/s390x/event-facility.c
index 1108e2d..9367660 100644
--- a/hw/s390x/event-facility.c
+++ b/hw/s390x/event-facility.c
@@ -315,6 +315,7 @@ static void command_handler(SCLPEventFacility *ef, SCCB *sccb, uint64_t code)
 static int init_event_facility(S390SCLPDevice *sdev)
 {
     SCLPEventFacility *event_facility;
+    DeviceState *quiesce;
 
     event_facility = g_malloc0(sizeof(SCLPEventFacility));
     sdev->ef = event_facility;
@@ -327,6 +328,12 @@ static int init_event_facility(S390SCLPDevice *sdev)
     event_facility->sbus.qbus.allow_hotplug = 0;
     event_facility->qdev = (DeviceState *) sdev;
 
+    quiesce = qdev_create(&event_facility->sbus.qbus, "sclpquiesce");
+    if (!quiesce) {
+        return -1;
+    }
+    qdev_init_nofail(quiesce);
+
     return 0;
 }
 
diff --git a/hw/s390x/sclpquiesce.c b/hw/s390x/sclpquiesce.c
new file mode 100644
index 0000000..9a773b8
--- /dev/null
+++ b/hw/s390x/sclpquiesce.c
@@ -0,0 +1,123 @@
+/*
+ * SCLP event type
+ *    Signal Quiesce - trigger system powerdown request
+ *
+ * Copyright IBM, Corp. 2012
+ *
+ * Authors:
+ *  Heinz Graalfs <graalfs at de.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or (at your
+ * option) any later version.  See the COPYING file in the top-level directory.
+ *
+ */
+#include <hw/qdev.h>
+#include "sysemu.h"
+#include "sclp.h"
+#include "event-facility.h"
+
+typedef struct SignalQuiesce {
+    EventBufferHeader ebh;
+    uint16_t timeout;
+    uint8_t unit;
+} QEMU_PACKED SignalQuiesce;
+
+static int event_type(void)
+{
+    return SCLP_EVENT_SIGNAL_QUIESCE;
+}
+
+static unsigned int send_mask(void)
+{
+    return SCLP_EVENT_MASK_SIGNAL_QUIESCE;
+}
+
+static unsigned int receive_mask(void)
+{
+    return 0;
+}
+
+static int read_event_data(SCLPEvent *event, EventBufferHeader *evt_buf_hdr,
+                           int *slen)
+{
+    SignalQuiesce *sq = (SignalQuiesce *) evt_buf_hdr;
+
+    if (*slen < sizeof(SignalQuiesce)) {
+        return 0;
+    }
+
+    if (!event->event_pending) {
+        return 0;
+    }
+    event->event_pending = false;
+
+    sq->ebh.length = cpu_to_be16(sizeof(SignalQuiesce));
+    sq->ebh.type = SCLP_EVENT_SIGNAL_QUIESCE;
+    sq->ebh.flags |= SCLP_EVENT_BUFFER_ACCEPTED;
+    /*
+     * system_powerdown does not have a timeout. Fortunately the
+     * timeout value is currently ignored by Linux, anyway
+     */
+    sq->timeout = cpu_to_be16(0);
+    sq->unit = cpu_to_be16(0);
+    *slen -= sizeof(SignalQuiesce);
+
+    return 1;
+}
+
+typedef struct QuiesceNotifier QuiesceNotifier;
+
+static struct QuiesceNotifier {
+    Notifier notifier;
+    SCLPEvent *event;
+} qn;
+
+static void quiesce_powerdown_req(Notifier *n, void *opaque)
+{
+    QuiesceNotifier *qn = container_of(n, QuiesceNotifier, notifier);
+    SCLPEvent *event = qn->event;
+
+    event->event_pending = true;
+    /* trigger SCLP read operation */
+    sclp_service_interrupt(0);
+}
+
+static int quiesce_init(SCLPEvent *event)
+{
+    event->event_type = SCLP_EVENT_SIGNAL_QUIESCE;
+
+    qn.notifier.notify = quiesce_powerdown_req;
+    qn.event = event;
+
+    qemu_register_powerdown_notifier(&qn.notifier);
+
+    return 0;
+}
+
+static void quiesce_class_init(ObjectClass *klass, void *data)
+{
+    SCLPEventClass *k = SCLP_EVENT_CLASS(klass);
+
+    k->init = quiesce_init;
+
+    k->get_send_mask = send_mask;
+    k->get_receive_mask = receive_mask;
+    k->event_type = event_type;
+    k->read_event_data = read_event_data;
+    k->write_event_data = NULL;
+}
+
+static TypeInfo sclp_quiesce_info = {
+    .name          = "sclpquiesce",
+    .parent        = TYPE_SCLP_EVENT,
+    .instance_size = sizeof(SCLPEvent),
+    .class_init    = quiesce_class_init,
+    .class_size    = sizeof(SCLPEventClass),
+};
+
+static void register_types(void)
+{
+    type_register_static(&sclp_quiesce_info);
+}
+
+type_init(register_types)
commit 559a17a1439e34a95dcab47ef99022bcd0e8f8e7
Author: Heinz Graalfs <graalfs at linux.vnet.ibm.com>
Date:   Mon Oct 29 02:13:23 2012 +0000

    s390: sclp event support
    
    Several SCLP features are considered to be events. Those events don't
    provide SCLP commands on their own, instead they are all based on
    Read Event Data, Write Event Data, Write Event Mask and the service
    interrupt. Follow-on patches will provide SCLP's Signal Quiesce (via
    system_powerdown) and the ASCII console.
    Further down the road the sclp line mode console and configuration
    change events (e.g. cpu hotplug) can be implemented.
    
    Signed-off-by: Heinz Graalfs <graalfs at linux.vnet.ibm.com>
    Signed-off-by: Christian Borntraeger <borntraeger at de.ibm.com>
    Signed-off-by: Jens Freimann <jfrei at linux.vnet.ibm.com>
    Signed-off-by: Alexander Graf <agraf at suse.de>

diff --git a/hw/s390-virtio.c b/hw/s390-virtio.c
index 52fad37..685cb54 100644
--- a/hw/s390-virtio.c
+++ b/hw/s390-virtio.c
@@ -32,6 +32,7 @@
 #include "exec-memory.h"
 
 #include "hw/s390-virtio-bus.h"
+#include "hw/s390x/sclp.h"
 
 //#define DEBUG_S390
 
@@ -184,6 +185,7 @@ static void s390_init(QEMUMachineInitArgs *args)
 
     /* get a BUS */
     s390_bus = s390_virtio_bus_init(&my_ram_size);
+    s390_sclp_init();
 
     /* allocate RAM */
     memory_region_init_ram(ram, "s390.ram", my_ram_size);
diff --git a/hw/s390x/Makefile.objs b/hw/s390x/Makefile.objs
index 1c14b96..b32fc52 100644
--- a/hw/s390x/Makefile.objs
+++ b/hw/s390x/Makefile.objs
@@ -2,3 +2,4 @@ obj-y = s390-virtio-bus.o s390-virtio.o
 
 obj-y := $(addprefix ../,$(obj-y))
 obj-y += sclp.o
+obj-y += event-facility.o
diff --git a/hw/s390x/event-facility.c b/hw/s390x/event-facility.c
new file mode 100644
index 0000000..1108e2d
--- /dev/null
+++ b/hw/s390x/event-facility.c
@@ -0,0 +1,391 @@
+/*
+ * SCLP
+ *    Event Facility
+ *       handles SCLP event types
+ *          - Signal Quiesce - system power down
+ *          - ASCII Console Data - VT220 read and write
+ *
+ * Copyright IBM, Corp. 2012
+ *
+ * Authors:
+ *  Heinz Graalfs <graalfs at de.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or (at your
+ * option) any later version.  See the COPYING file in the top-level directory.
+ *
+ */
+
+#include "monitor.h"
+#include "sysemu.h"
+
+#include "sclp.h"
+#include "event-facility.h"
+
+typedef struct EventTypesBus {
+    BusState qbus;
+} EventTypesBus;
+
+struct SCLPEventFacility {
+    EventTypesBus sbus;
+    DeviceState *qdev;
+    /* guest' receive mask */
+    unsigned int receive_mask;
+};
+
+/* return true if any child has event pending set */
+static bool event_pending(SCLPEventFacility *ef)
+{
+    BusChild *kid;
+    SCLPEvent *event;
+    SCLPEventClass *event_class;
+
+    QTAILQ_FOREACH(kid, &ef->sbus.qbus.children, sibling) {
+        DeviceState *qdev = kid->child;
+        event = DO_UPCAST(SCLPEvent, qdev, qdev);
+        event_class = SCLP_EVENT_GET_CLASS(event);
+        if (event->event_pending &&
+            event_class->get_send_mask() & ef->receive_mask) {
+            return true;
+        }
+    }
+    return false;
+}
+
+static unsigned int get_host_send_mask(SCLPEventFacility *ef)
+{
+    unsigned int mask;
+    BusChild *kid;
+    SCLPEventClass *child;
+
+    mask = 0;
+
+    QTAILQ_FOREACH(kid, &ef->sbus.qbus.children, sibling) {
+        DeviceState *qdev = kid->child;
+        child = SCLP_EVENT_GET_CLASS((SCLPEvent *) qdev);
+        mask |= child->get_send_mask();
+    }
+    return mask;
+}
+
+static unsigned int get_host_receive_mask(SCLPEventFacility *ef)
+{
+    unsigned int mask;
+    BusChild *kid;
+    SCLPEventClass *child;
+
+    mask = 0;
+
+    QTAILQ_FOREACH(kid, &ef->sbus.qbus.children, sibling) {
+        DeviceState *qdev = kid->child;
+        child = SCLP_EVENT_GET_CLASS((SCLPEvent *) qdev);
+        mask |= child->get_receive_mask();
+    }
+    return mask;
+}
+
+static uint16_t write_event_length_check(SCCB *sccb)
+{
+    int slen;
+    unsigned elen = 0;
+    EventBufferHeader *event;
+    WriteEventData *wed = (WriteEventData *) sccb;
+
+    event = (EventBufferHeader *) &wed->ebh;
+    for (slen = sccb_data_len(sccb); slen > 0; slen -= elen) {
+        elen = be16_to_cpu(event->length);
+        if (elen < sizeof(*event) || elen > slen) {
+            return SCLP_RC_EVENT_BUFFER_SYNTAX_ERROR;
+        }
+        event = (void *) event + elen;
+    }
+    if (slen) {
+        return SCLP_RC_INCONSISTENT_LENGTHS;
+    }
+    return SCLP_RC_NORMAL_COMPLETION;
+}
+
+static uint16_t handle_write_event_buf(SCLPEventFacility *ef,
+                                       EventBufferHeader *event_buf, SCCB *sccb)
+{
+    uint16_t rc;
+    BusChild *kid;
+    SCLPEvent *event;
+    SCLPEventClass *ec;
+
+    QTAILQ_FOREACH(kid, &ef->sbus.qbus.children, sibling) {
+        DeviceState *qdev = kid->child;
+        event = (SCLPEvent *) qdev;
+        ec = SCLP_EVENT_GET_CLASS(event);
+
+        rc = SCLP_RC_INVALID_FUNCTION;
+        if (ec->write_event_data &&
+            ec->event_type() == event_buf->type) {
+            rc = ec->write_event_data(event, event_buf);
+            break;
+        }
+    }
+    return rc;
+}
+
+static uint16_t handle_sccb_write_events(SCLPEventFacility *ef, SCCB *sccb)
+{
+    uint16_t rc;
+    int slen;
+    unsigned elen = 0;
+    EventBufferHeader *event_buf;
+    WriteEventData *wed = (WriteEventData *) sccb;
+
+    event_buf = &wed->ebh;
+    rc = SCLP_RC_NORMAL_COMPLETION;
+
+    /* loop over all contained event buffers */
+    for (slen = sccb_data_len(sccb); slen > 0; slen -= elen) {
+        elen = be16_to_cpu(event_buf->length);
+
+        /* in case of a previous error mark all trailing buffers
+         * as not accepted */
+        if (rc != SCLP_RC_NORMAL_COMPLETION) {
+            event_buf->flags &= ~(SCLP_EVENT_BUFFER_ACCEPTED);
+        } else {
+            rc = handle_write_event_buf(ef, event_buf, sccb);
+        }
+        event_buf = (void *) event_buf + elen;
+    }
+    return rc;
+}
+
+static void write_event_data(SCLPEventFacility *ef, SCCB *sccb)
+{
+    if (sccb->h.function_code != SCLP_FC_NORMAL_WRITE) {
+        sccb->h.response_code = cpu_to_be16(SCLP_RC_INVALID_FUNCTION);
+        goto out;
+    }
+    if (be16_to_cpu(sccb->h.length) < 8) {
+        sccb->h.response_code = cpu_to_be16(SCLP_RC_INSUFFICIENT_SCCB_LENGTH);
+        goto out;
+    }
+    /* first do a sanity check of the write events */
+    sccb->h.response_code = cpu_to_be16(write_event_length_check(sccb));
+
+    /* if no early error, then execute */
+    if (sccb->h.response_code == be16_to_cpu(SCLP_RC_NORMAL_COMPLETION)) {
+        sccb->h.response_code =
+                cpu_to_be16(handle_sccb_write_events(ef, sccb));
+    }
+
+out:
+    return;
+}
+
+static uint16_t handle_sccb_read_events(SCLPEventFacility *ef, SCCB *sccb,
+                                        unsigned int mask)
+{
+    uint16_t rc;
+    int slen;
+    unsigned elen = 0;
+    BusChild *kid;
+    SCLPEvent *event;
+    SCLPEventClass *ec;
+    EventBufferHeader *event_buf;
+    ReadEventData *red = (ReadEventData *) sccb;
+
+    event_buf = &red->ebh;
+    event_buf->length = 0;
+    slen = sizeof(sccb->data);
+
+    rc = SCLP_RC_NO_EVENT_BUFFERS_STORED;
+
+    QTAILQ_FOREACH(kid, &ef->sbus.qbus.children, sibling) {
+        DeviceState *qdev = kid->child;
+        event = (SCLPEvent *) qdev;
+        ec = SCLP_EVENT_GET_CLASS(event);
+
+        if (mask & ec->get_send_mask()) {
+            if (ec->read_event_data(event, event_buf, &slen)) {
+                rc = SCLP_RC_NORMAL_COMPLETION;
+            }
+        }
+        elen = be16_to_cpu(event_buf->length);
+        event_buf = (void *) event_buf + elen;
+    }
+
+    if (sccb->h.control_mask[2] & SCLP_VARIABLE_LENGTH_RESPONSE) {
+        /* architecture suggests to reset variable-length-response bit */
+        sccb->h.control_mask[2] &= ~SCLP_VARIABLE_LENGTH_RESPONSE;
+        /* with a new length value */
+        sccb->h.length = cpu_to_be16(SCCB_SIZE - slen);
+    }
+    return rc;
+}
+
+static void read_event_data(SCLPEventFacility *ef, SCCB *sccb)
+{
+    unsigned int sclp_active_selection_mask;
+    unsigned int sclp_cp_receive_mask;
+
+    ReadEventData *red = (ReadEventData *) sccb;
+
+    if (be16_to_cpu(sccb->h.length) != SCCB_SIZE) {
+        sccb->h.response_code = cpu_to_be16(SCLP_RC_INSUFFICIENT_SCCB_LENGTH);
+        goto out;
+    }
+
+    sclp_cp_receive_mask = ef->receive_mask;
+
+    /* get active selection mask */
+    switch (sccb->h.function_code) {
+    case SCLP_UNCONDITIONAL_READ:
+        sclp_active_selection_mask = sclp_cp_receive_mask;
+        break;
+    case SCLP_SELECTIVE_READ:
+        if (!(sclp_cp_receive_mask & be32_to_cpu(red->mask))) {
+            sccb->h.response_code =
+                    cpu_to_be16(SCLP_RC_INVALID_SELECTION_MASK);
+            goto out;
+        }
+        sclp_active_selection_mask = be32_to_cpu(red->mask);
+        break;
+    default:
+        sccb->h.response_code = cpu_to_be16(SCLP_RC_INVALID_FUNCTION);
+        goto out;
+    }
+    sccb->h.response_code = cpu_to_be16(
+            handle_sccb_read_events(ef, sccb, sclp_active_selection_mask));
+
+out:
+    return;
+}
+
+static void write_event_mask(SCLPEventFacility *ef, SCCB *sccb)
+{
+    WriteEventMask *we_mask = (WriteEventMask *) sccb;
+
+    /* Attention: We assume that Linux uses 4-byte masks, what it actually
+       does. Architecture allows for masks of variable size, though */
+    if (be16_to_cpu(we_mask->mask_length) != 4) {
+        sccb->h.response_code = cpu_to_be16(SCLP_RC_INVALID_MASK_LENGTH);
+        goto out;
+    }
+
+    /* keep track of the guest's capability masks */
+    ef->receive_mask = be32_to_cpu(we_mask->cp_receive_mask);
+
+    /* return the SCLP's capability masks to the guest */
+    we_mask->send_mask = cpu_to_be32(get_host_send_mask(ef));
+    we_mask->receive_mask = cpu_to_be32(get_host_receive_mask(ef));
+
+    sccb->h.response_code = cpu_to_be16(SCLP_RC_NORMAL_COMPLETION);
+
+out:
+    return;
+}
+
+/* qemu object creation and initialization functions */
+
+#define TYPE_SCLP_EVENTS_BUS "s390-sclp-events-bus"
+
+static void sclp_events_bus_class_init(ObjectClass *klass, void *data)
+{
+}
+
+static const TypeInfo s390_sclp_events_bus_info = {
+    .name = TYPE_SCLP_EVENTS_BUS,
+    .parent = TYPE_BUS,
+    .class_init = sclp_events_bus_class_init,
+};
+
+static void command_handler(SCLPEventFacility *ef, SCCB *sccb, uint64_t code)
+{
+    switch (code) {
+    case SCLP_CMD_READ_EVENT_DATA:
+        read_event_data(ef, sccb);
+        break;
+    case SCLP_CMD_WRITE_EVENT_DATA:
+        write_event_data(ef, sccb);
+        break;
+    case SCLP_CMD_WRITE_EVENT_MASK:
+        write_event_mask(ef, sccb);
+        break;
+    default:
+        sccb->h.response_code = cpu_to_be16(SCLP_RC_INVALID_SCLP_COMMAND);
+        break;
+    }
+}
+
+static int init_event_facility(S390SCLPDevice *sdev)
+{
+    SCLPEventFacility *event_facility;
+
+    event_facility = g_malloc0(sizeof(SCLPEventFacility));
+    sdev->ef = event_facility;
+    sdev->sclp_command_handler = command_handler;
+    sdev->event_pending = event_pending;
+
+    /* Spawn a new sclp-events facility */
+    qbus_create_inplace(&event_facility->sbus.qbus,
+                        TYPE_SCLP_EVENTS_BUS, (DeviceState *)sdev, NULL);
+    event_facility->sbus.qbus.allow_hotplug = 0;
+    event_facility->qdev = (DeviceState *) sdev;
+
+    return 0;
+}
+
+static void init_event_facility_class(ObjectClass *klass, void *data)
+{
+    S390SCLPDeviceClass *k = SCLP_S390_DEVICE_CLASS(klass);
+
+    k->init = init_event_facility;
+}
+
+static TypeInfo s390_sclp_event_facility_info = {
+    .name          = "s390-sclp-event-facility",
+    .parent        = TYPE_DEVICE_S390_SCLP,
+    .instance_size = sizeof(S390SCLPDevice),
+    .class_init    = init_event_facility_class,
+};
+
+static int event_qdev_init(DeviceState *qdev)
+{
+    SCLPEvent *event = DO_UPCAST(SCLPEvent, qdev, qdev);
+    SCLPEventClass *child = SCLP_EVENT_GET_CLASS(event);
+
+    return child->init(event);
+}
+
+static int event_qdev_exit(DeviceState *qdev)
+{
+    SCLPEvent *event = DO_UPCAST(SCLPEvent, qdev, qdev);
+    SCLPEventClass *child = SCLP_EVENT_GET_CLASS(event);
+    if (child->exit) {
+        child->exit(event);
+    }
+    return 0;
+}
+
+static void event_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+
+    dc->bus_type = TYPE_SCLP_EVENTS_BUS;
+    dc->unplug = qdev_simple_unplug_cb;
+    dc->init = event_qdev_init;
+    dc->exit = event_qdev_exit;
+}
+
+static TypeInfo s390_sclp_event_type_info = {
+    .name = TYPE_SCLP_EVENT,
+    .parent = TYPE_DEVICE,
+    .instance_size = sizeof(SCLPEvent),
+    .class_init = event_class_init,
+    .class_size = sizeof(SCLPEventClass),
+    .abstract = true,
+};
+
+static void register_types(void)
+{
+    type_register_static(&s390_sclp_events_bus_info);
+    type_register_static(&s390_sclp_event_facility_info);
+    type_register_static(&s390_sclp_event_type_info);
+}
+
+type_init(register_types)
diff --git a/hw/s390x/event-facility.h b/hw/s390x/event-facility.h
new file mode 100644
index 0000000..30af0a7
--- /dev/null
+++ b/hw/s390x/event-facility.h
@@ -0,0 +1,96 @@
+/*
+ * SCLP
+ *    Event Facility definitions
+ *
+ * Copyright IBM, Corp. 2012
+ *
+ * Authors:
+ *  Heinz Graalfs <graalfs at de.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or (at your
+ * option) any later version.  See the COPYING file in the top-level directory.
+ *
+ */
+
+#ifndef HW_S390_SCLP_EVENT_FACILITY_H
+#define HW_S390_SCLP_EVENT_FACILITY_H
+
+#include <hw/qdev.h>
+#include "qemu-thread.h"
+
+/* SCLP event types */
+#define SCLP_EVENT_ASCII_CONSOLE_DATA           0x1a
+#define SCLP_EVENT_SIGNAL_QUIESCE               0x1d
+
+/* SCLP event masks */
+#define SCLP_EVENT_MASK_SIGNAL_QUIESCE          0x00000008
+#define SCLP_EVENT_MASK_MSG_ASCII               0x00000040
+
+#define SCLP_UNCONDITIONAL_READ                 0x00
+#define SCLP_SELECTIVE_READ                     0x01
+
+#define TYPE_SCLP_EVENT "s390-sclp-event-type"
+#define SCLP_EVENT(obj) \
+     OBJECT_CHECK(SCLPEvent, (obj), TYPE_SCLP_EVENT)
+#define SCLP_EVENT_CLASS(klass) \
+     OBJECT_CLASS_CHECK(SCLPEventClass, (klass), TYPE_SCLP_EVENT)
+#define SCLP_EVENT_GET_CLASS(obj) \
+     OBJECT_GET_CLASS(SCLPEventClass, (obj), TYPE_SCLP_EVENT)
+
+typedef struct WriteEventMask {
+    SCCBHeader h;
+    uint16_t _reserved;
+    uint16_t mask_length;
+    uint32_t cp_receive_mask;
+    uint32_t cp_send_mask;
+    uint32_t send_mask;
+    uint32_t receive_mask;
+} QEMU_PACKED WriteEventMask;
+
+typedef struct EventBufferHeader {
+    uint16_t length;
+    uint8_t  type;
+    uint8_t  flags;
+    uint16_t _reserved;
+} QEMU_PACKED EventBufferHeader;
+
+typedef struct WriteEventData {
+    SCCBHeader h;
+    EventBufferHeader ebh;
+} QEMU_PACKED WriteEventData;
+
+typedef struct ReadEventData {
+    SCCBHeader h;
+    EventBufferHeader ebh;
+    uint32_t mask;
+} QEMU_PACKED ReadEventData;
+
+typedef struct SCLPEvent {
+    DeviceState qdev;
+    bool event_pending;
+    uint32_t event_type;
+    char *name;
+} SCLPEvent;
+
+typedef struct SCLPEventClass {
+    DeviceClass parent_class;
+    int (*init)(SCLPEvent *event);
+    int (*exit)(SCLPEvent *event);
+
+    /* get SCLP's send mask */
+    unsigned int (*get_send_mask)(void);
+
+    /* get SCLP's receive mask */
+    unsigned int (*get_receive_mask)(void);
+
+    int (*read_event_data)(SCLPEvent *event, EventBufferHeader *evt_buf_hdr,
+                           int *slen);
+
+    int (*write_event_data)(SCLPEvent *event, EventBufferHeader *evt_buf_hdr);
+
+    /* returns the supported event type */
+    int (*event_type)(void);
+
+} SCLPEventClass;
+
+#endif
diff --git a/hw/s390x/sclp.c b/hw/s390x/sclp.c
index d902a66..5c274fa 100644
--- a/hw/s390x/sclp.c
+++ b/hw/s390x/sclp.c
@@ -18,6 +18,15 @@
 
 #include "sclp.h"
 
+static inline S390SCLPDevice *get_event_facility(void)
+{
+    ObjectProperty *op = object_property_find(qdev_get_machine(),
+                                              "s390-sclp-event-facility",
+                                              NULL);
+    assert(op);
+    return op->opaque;
+}
+
 /* Provide information about the configuration, CPUs and storage */
 static void read_SCP_info(SCCB *sccb)
 {
@@ -34,13 +43,15 @@ static void read_SCP_info(SCCB *sccb)
 
 static void sclp_execute(SCCB *sccb, uint64_t code)
 {
+    S390SCLPDevice *sdev = get_event_facility();
+
     switch (code) {
     case SCLP_CMDW_READ_SCP_INFO:
     case SCLP_CMDW_READ_SCP_INFO_FORCED:
         read_SCP_info(sccb);
         break;
     default:
-        sccb->h.response_code = cpu_to_be16(SCLP_RC_INVALID_SCLP_COMMAND);
+        sdev->sclp_command_handler(sdev->ef, sccb, code);
         break;
     }
 }
@@ -89,11 +100,45 @@ out:
 
 void sclp_service_interrupt(uint32_t sccb)
 {
-    s390_sclp_extint(sccb & ~3);
+    S390SCLPDevice *sdev = get_event_facility();
+    uint32_t param = sccb & ~3;
+
+    /* Indicate whether an event is still pending */
+    param |= sdev->event_pending(sdev->ef) ? 1 : 0;
+
+    if (!param) {
+        /* No need to send an interrupt, there's nothing to be notified about */
+        return;
+    }
+    s390_sclp_extint(param);
 }
 
 /* qemu object creation and initialization functions */
 
+void s390_sclp_init(void)
+{
+    DeviceState *dev  = qdev_create(NULL, "s390-sclp-event-facility");
+
+    object_property_add_child(qdev_get_machine(), "s390-sclp-event-facility",
+                              OBJECT(dev), NULL);
+    qdev_init_nofail(dev);
+}
+
+static int s390_sclp_dev_init(SysBusDevice *dev)
+{
+    int r;
+    S390SCLPDevice *sdev = (S390SCLPDevice *)dev;
+    S390SCLPDeviceClass *sclp = SCLP_S390_DEVICE_GET_CLASS(dev);
+
+    r = sclp->init(sdev);
+    if (!r) {
+        assert(sdev->event_pending);
+        assert(sdev->sclp_command_handler);
+    }
+
+    return r;
+}
+
 static void s390_sclp_device_class_init(ObjectClass *klass, void *data)
 {
     SysBusDeviceClass *dc = SYS_BUS_DEVICE_CLASS(klass);
diff --git a/hw/s390x/sclp.h b/hw/s390x/sclp.h
index e9ad42b..fe89dad 100644
--- a/hw/s390x/sclp.h
+++ b/hw/s390x/sclp.h
@@ -20,15 +20,35 @@
 /* SCLP command codes */
 #define SCLP_CMDW_READ_SCP_INFO                 0x00020001
 #define SCLP_CMDW_READ_SCP_INFO_FORCED          0x00120001
+#define SCLP_CMD_READ_EVENT_DATA                0x00770005
+#define SCLP_CMD_WRITE_EVENT_DATA               0x00760005
+#define SCLP_CMD_READ_EVENT_DATA                0x00770005
+#define SCLP_CMD_WRITE_EVENT_DATA               0x00760005
+#define SCLP_CMD_WRITE_EVENT_MASK               0x00780005
 
 /* SCLP response codes */
 #define SCLP_RC_NORMAL_READ_COMPLETION          0x0010
+#define SCLP_RC_NORMAL_COMPLETION               0x0020
 #define SCLP_RC_INVALID_SCLP_COMMAND            0x01f0
+#define SCLP_RC_CONTAINED_EQUIPMENT_CHECK       0x0340
+#define SCLP_RC_INSUFFICIENT_SCCB_LENGTH        0x0300
+#define SCLP_RC_INVALID_FUNCTION                0x40f0
+#define SCLP_RC_NO_EVENT_BUFFERS_STORED         0x60f0
+#define SCLP_RC_INVALID_SELECTION_MASK          0x70f0
+#define SCLP_RC_INCONSISTENT_LENGTHS            0x72f0
+#define SCLP_RC_EVENT_BUFFER_SYNTAX_ERROR       0x73f0
+#define SCLP_RC_INVALID_MASK_LENGTH             0x74f0
+
 
 /* Service Call Control Block (SCCB) and its elements */
 
 #define SCCB_SIZE 4096
 
+#define SCLP_VARIABLE_LENGTH_RESPONSE           0x80
+#define SCLP_EVENT_BUFFER_ACCEPTED              0x80
+
+#define SCLP_FC_NORMAL_WRITE                    0
+
 /*
  * Normally packed structures are not the right thing to do, since all code
  * must take care of endianess. We cant use ldl_phys and friends for two
@@ -62,8 +82,29 @@ typedef struct SCCB {
     char data[SCCB_DATA_LEN];
  } QEMU_PACKED SCCB;
 
+static inline int sccb_data_len(SCCB *sccb)
+{
+    return be16_to_cpu(sccb->h.length) - sizeof(sccb->h);
+}
+
+#define TYPE_DEVICE_S390_SCLP "s390-sclp-device"
+#define SCLP_S390_DEVICE(obj) \
+     OBJECT_CHECK(S390SCLPDevice, (obj), TYPE_DEVICE_S390_SCLP)
+#define SCLP_S390_DEVICE_CLASS(klass) \
+     OBJECT_CLASS_CHECK(S390SCLPDeviceClass, (klass), \
+             TYPE_DEVICE_S390_SCLP)
+#define SCLP_S390_DEVICE_GET_CLASS(obj) \
+     OBJECT_GET_CLASS(S390SCLPDeviceClass, (obj), \
+             TYPE_DEVICE_S390_SCLP)
+
+typedef struct SCLPEventFacility SCLPEventFacility;
+
 typedef struct S390SCLPDevice {
     SysBusDevice busdev;
+    SCLPEventFacility *ef;
+    void (*sclp_command_handler)(SCLPEventFacility *ef, SCCB *sccb,
+                                 uint64_t code);
+    bool (*event_pending)(SCLPEventFacility *ef);
 } S390SCLPDevice;
 
 typedef struct S390SCLPDeviceClass {
@@ -71,6 +112,7 @@ typedef struct S390SCLPDeviceClass {
     int (*init)(S390SCLPDevice *sdev);
 } S390SCLPDeviceClass;
 
+void s390_sclp_init(void);
 void sclp_service_interrupt(uint32_t sccb);
 
 #endif
commit f6c98f9286c69cafe108b7e9fb22c2ff5da7d6a9
Author: Heinz Graalfs <graalfs at linux.vnet.ibm.com>
Date:   Mon Oct 29 02:13:22 2012 +0000

    s390: sclp base support
    
    This adds a more generic infrastructure for handling Service-Call
    requests on s390. Currently we only support a small subset of Read
    SCP Info directly in target-s390x. This patch provides the base
    infrastructure for supporting more commands and moves Read SCP
    Info.
    In the future we could add additional commands for hotplug, call
    home and event handling.
    
    Signed-off-by: Heinz Graalfs <graalfs at linux.vnet.ibm.com>
    Signed-off-by: Christian Borntraeger <borntraeger at de.ibm.com>
    Signed-off-by: Jens Freimann <jfrei at linux.vnet.ibm.com>
    Signed-off-by: Alexander Graf <agraf at suse.de>

diff --git a/hw/s390x/Makefile.objs b/hw/s390x/Makefile.objs
index dcdcac8..1c14b96 100644
--- a/hw/s390x/Makefile.objs
+++ b/hw/s390x/Makefile.objs
@@ -1,3 +1,4 @@
 obj-y = s390-virtio-bus.o s390-virtio.o
 
 obj-y := $(addprefix ../,$(obj-y))
+obj-y += sclp.o
diff --git a/hw/s390x/sclp.c b/hw/s390x/sclp.c
new file mode 100644
index 0000000..d902a66
--- /dev/null
+++ b/hw/s390x/sclp.c
@@ -0,0 +1,118 @@
+/*
+ * SCLP Support
+ *
+ * Copyright IBM, Corp. 2012
+ *
+ * Authors:
+ *  Christian Borntraeger <borntraeger at de.ibm.com>
+ *  Heinz Graalfs <graalfs at linux.vnet.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or (at your
+ * option) any later version.  See the COPYING file in the top-level directory.
+ *
+ */
+
+#include "cpu.h"
+#include "kvm.h"
+#include "memory.h"
+
+#include "sclp.h"
+
+/* Provide information about the configuration, CPUs and storage */
+static void read_SCP_info(SCCB *sccb)
+{
+    ReadInfo *read_info = (ReadInfo *) sccb;
+    int shift = 0;
+
+    while ((ram_size >> (20 + shift)) > 65535) {
+        shift++;
+    }
+    read_info->rnmax = cpu_to_be16(ram_size >> (20 + shift));
+    read_info->rnsize = 1 << shift;
+    sccb->h.response_code = cpu_to_be16(SCLP_RC_NORMAL_READ_COMPLETION);
+}
+
+static void sclp_execute(SCCB *sccb, uint64_t code)
+{
+    switch (code) {
+    case SCLP_CMDW_READ_SCP_INFO:
+    case SCLP_CMDW_READ_SCP_INFO_FORCED:
+        read_SCP_info(sccb);
+        break;
+    default:
+        sccb->h.response_code = cpu_to_be16(SCLP_RC_INVALID_SCLP_COMMAND);
+        break;
+    }
+}
+
+int sclp_service_call(uint32_t sccb, uint64_t code)
+{
+    int r = 0;
+    SCCB work_sccb;
+
+    hwaddr sccb_len = sizeof(SCCB);
+
+    /* first some basic checks on program checks */
+    if (cpu_physical_memory_is_io(sccb)) {
+        r = -PGM_ADDRESSING;
+        goto out;
+    }
+    if (sccb & ~0x7ffffff8ul) {
+        r = -PGM_SPECIFICATION;
+        goto out;
+    }
+
+    /*
+     * we want to work on a private copy of the sccb, to prevent guests
+     * from playing dirty tricks by modifying the memory content after
+     * the host has checked the values
+     */
+    cpu_physical_memory_read(sccb, &work_sccb, sccb_len);
+
+    /* Valid sccb sizes */
+    if (be16_to_cpu(work_sccb.h.length) < sizeof(SCCBHeader) ||
+        be16_to_cpu(work_sccb.h.length) > SCCB_SIZE) {
+        r = -PGM_SPECIFICATION;
+        goto out;
+    }
+
+    sclp_execute((SCCB *)&work_sccb, code);
+
+    cpu_physical_memory_write(sccb, &work_sccb,
+                              be16_to_cpu(work_sccb.h.length));
+
+    sclp_service_interrupt(sccb);
+
+out:
+    return r;
+}
+
+void sclp_service_interrupt(uint32_t sccb)
+{
+    s390_sclp_extint(sccb & ~3);
+}
+
+/* qemu object creation and initialization functions */
+
+static void s390_sclp_device_class_init(ObjectClass *klass, void *data)
+{
+    SysBusDeviceClass *dc = SYS_BUS_DEVICE_CLASS(klass);
+
+    dc->init = s390_sclp_dev_init;
+}
+
+static TypeInfo s390_sclp_device_info = {
+    .name = TYPE_DEVICE_S390_SCLP,
+    .parent = TYPE_SYS_BUS_DEVICE,
+    .instance_size = sizeof(S390SCLPDevice),
+    .class_init = s390_sclp_device_class_init,
+    .class_size = sizeof(S390SCLPDeviceClass),
+    .abstract = true,
+};
+
+static void s390_sclp_register_types(void)
+{
+    type_register_static(&s390_sclp_device_info);
+}
+
+type_init(s390_sclp_register_types)
diff --git a/hw/s390x/sclp.h b/hw/s390x/sclp.h
new file mode 100644
index 0000000..e9ad42b
--- /dev/null
+++ b/hw/s390x/sclp.h
@@ -0,0 +1,76 @@
+/*
+ * SCLP Support
+ *
+ * Copyright IBM, Corp. 2012
+ *
+ * Authors:
+ *  Christian Borntraeger <borntraeger at de.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or (at your
+ * option) any later version.  See the COPYING file in the top-level directory.
+ *
+ */
+
+#ifndef HW_S390_SCLP_H
+#define HW_S390_SCLP_H
+
+#include <hw/sysbus.h>
+#include <hw/qdev.h>
+
+/* SCLP command codes */
+#define SCLP_CMDW_READ_SCP_INFO                 0x00020001
+#define SCLP_CMDW_READ_SCP_INFO_FORCED          0x00120001
+
+/* SCLP response codes */
+#define SCLP_RC_NORMAL_READ_COMPLETION          0x0010
+#define SCLP_RC_INVALID_SCLP_COMMAND            0x01f0
+
+/* Service Call Control Block (SCCB) and its elements */
+
+#define SCCB_SIZE 4096
+
+/*
+ * Normally packed structures are not the right thing to do, since all code
+ * must take care of endianess. We cant use ldl_phys and friends for two
+ * reasons, though:
+ * - some of the embedded structures below the SCCB can appear multiple times
+ *   at different locations, so there is no fixed offset
+ * - we work on a private copy of the SCCB, since there are several length
+ *   fields, that would cause a security nightmare if we allow the guest to
+ *   alter the structure while we parse it. We cannot use ldl_p and friends
+ *   either without doing pointer arithmetics
+ * So we have to double check that all users of sclp data structures use the
+ * right endianess wrappers.
+ */
+typedef struct SCCBHeader {
+    uint16_t length;
+    uint8_t function_code;
+    uint8_t control_mask[3];
+    uint16_t response_code;
+} QEMU_PACKED SCCBHeader;
+
+#define SCCB_DATA_LEN (SCCB_SIZE - sizeof(SCCBHeader))
+
+typedef struct ReadInfo {
+    SCCBHeader h;
+    uint16_t rnmax;
+    uint8_t rnsize;
+} QEMU_PACKED ReadInfo;
+
+typedef struct SCCB {
+    SCCBHeader h;
+    char data[SCCB_DATA_LEN];
+ } QEMU_PACKED SCCB;
+
+typedef struct S390SCLPDevice {
+    SysBusDevice busdev;
+} S390SCLPDevice;
+
+typedef struct S390SCLPDeviceClass {
+    DeviceClass qdev;
+    int (*init)(S390SCLPDevice *sdev);
+} S390SCLPDeviceClass;
+
+void sclp_service_interrupt(uint32_t sccb);
+
+#endif
diff --git a/target-s390x/cpu.h b/target-s390x/cpu.h
index 9997765..5be6e83 100644
--- a/target-s390x/cpu.h
+++ b/target-s390x/cpu.h
@@ -596,17 +596,6 @@ static inline const char *cc_name(int cc_op)
     return cc_names[cc_op];
 }
 
-/* SCLP PV interface defines */
-#define SCLP_CMDW_READ_SCP_INFO         0x00020001
-#define SCLP_CMDW_READ_SCP_INFO_FORCED  0x00120001
-
-#define SCP_LENGTH                      0x00
-#define SCP_FUNCTION_CODE               0x02
-#define SCP_CONTROL_MASK                0x03
-#define SCP_RESPONSE_CODE               0x06
-#define SCP_MEM_CODE                    0x08
-#define SCP_INCREMENT                   0x0a
-
 typedef struct LowCore
 {
     /* prefix area: defined by architecture */
@@ -955,7 +944,7 @@ static inline void ebcdic_put(uint8_t *p, const char *ascii, int len)
 void load_psw(CPUS390XState *env, uint64_t mask, uint64_t addr);
 int mmu_translate(CPUS390XState *env, target_ulong vaddr, int rw, uint64_t asc,
                   target_ulong *raddr, int *flags);
-int sclp_service_call(CPUS390XState *env, uint32_t sccb, uint64_t code);
+int sclp_service_call(uint32_t sccb, uint64_t code);
 uint32_t calc_cc(CPUS390XState *env, uint32_t cc_op, uint64_t src, uint64_t dst,
                  uint64_t vr);
 
diff --git a/target-s390x/kvm.c b/target-s390x/kvm.c
index e19a44d..a66ac43 100644
--- a/target-s390x/kvm.c
+++ b/target-s390x/kvm.c
@@ -60,9 +60,6 @@
 #define SIGP_STORE_STATUS_ADDR          0x0e
 #define SIGP_SET_ARCH                   0x12
 
-#define SCLP_CMDW_READ_SCP_INFO         0x00020001
-#define SCLP_CMDW_READ_SCP_INFO_FORCED  0x00120001
-
 const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
     KVM_CAP_LAST_INFO
 };
@@ -344,7 +341,7 @@ static int kvm_sclp_service_call(CPUS390XState *env, struct kvm_run *run,
     sccb = env->regs[ipbh0 & 0xf];
     code = env->regs[(ipbh0 & 0xf0) >> 4];
 
-    r = sclp_service_call(env, sccb, code);
+    r = sclp_service_call(sccb, code);
     if (r < 0) {
         enter_pgmcheck(env, -r);
     }
diff --git a/target-s390x/misc_helper.c b/target-s390x/misc_helper.c
index fdccd58..38d8f2a 100644
--- a/target-s390x/misc_helper.c
+++ b/target-s390x/misc_helper.c
@@ -67,55 +67,12 @@ void program_interrupt(CPUS390XState *env, uint32_t code, int ilc)
     }
 }
 
-/*
- * ret < 0 indicates program check, ret = 0, 1, 2, 3 -> cc
- */
-int sclp_service_call(CPUS390XState *env, uint32_t sccb, uint64_t code)
-{
-    int r = 0;
-    int shift = 0;
-
-#ifdef DEBUG_HELPER
-    printf("sclp(0x%x, 0x%" PRIx64 ")\n", sccb, code);
-#endif
-
-    /* basic checks */
-    if (cpu_physical_memory_is_io(sccb)) {
-        return -PGM_ADDRESSING;
-    }
-    if (sccb & ~0x7ffffff8ul) {
-        return -PGM_SPECIFICATION;
-    }
-
-    switch (code) {
-    case SCLP_CMDW_READ_SCP_INFO:
-    case SCLP_CMDW_READ_SCP_INFO_FORCED:
-        while ((ram_size >> (20 + shift)) > 65535) {
-            shift++;
-        }
-        stw_phys(sccb + SCP_MEM_CODE, ram_size >> (20 + shift));
-        stb_phys(sccb + SCP_INCREMENT, 1 << shift);
-        stw_phys(sccb + SCP_RESPONSE_CODE, 0x10);
-
-        s390_sclp_extint(sccb & ~3);
-        break;
-    default:
-#ifdef DEBUG_HELPER
-        printf("KVM: invalid sclp call 0x%x / 0x%" PRIx64 "x\n", sccb, code);
-#endif
-        r = 3;
-        break;
-    }
-
-    return r;
-}
-
 /* SCLP service call */
 uint32_t HELPER(servc)(CPUS390XState *env, uint32_t r1, uint64_t r2)
 {
     int r;
 
-    r = sclp_service_call(env, r1, r2);
+    r = sclp_service_call(r1, r2);
     if (r < 0) {
         program_interrupt(env, -r, 4);
         return 0;
commit 5b08b344bf581a266071389ceeb445e943e01ba1
Author: Christian Borntraeger <borntraeger at de.ibm.com>
Date:   Mon Oct 29 02:13:21 2012 +0000

    s390: use sync regs for register transfer
    
    Newer kernels provide the guest registers in kvm_run. Lets use
    those if available (i.e. the capability is set). This avoids
    ioctls on cpu_synchronize_state making intercepts faster.
    
    In addition, we have now the prefix register, the access registers
    the control registers up to date. This helps in certain cases,
    e.g. for resolving kernel module addresses with gdb on a guest.
    
    On return, we update the registers according to the level statement,
    i.e. we put all registers for KVM_PUT_FULL_STATE and _RESET_STATE.
    
    Signed-off-by: Christian Borntraeger <borntraeger at de.ibm.com>
    Signed-off-by: Jens Freimann <jfrei at linux.vnet.ibm.com>
    Signed-off-by: Alexander Graf <agraf at suse.de>

diff --git a/target-s390x/kvm.c b/target-s390x/kvm.c
index 07edf93..e19a44d 100644
--- a/target-s390x/kvm.c
+++ b/target-s390x/kvm.c
@@ -67,8 +67,11 @@ const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
     KVM_CAP_LAST_INFO
 };
 
+static int cap_sync_regs;
+
 int kvm_arch_init(KVMState *s)
 {
+    cap_sync_regs = kvm_check_extension(s, KVM_CAP_SYNC_REGS);
     return 0;
 }
 
@@ -90,47 +93,116 @@ void kvm_arch_reset_vcpu(CPUS390XState *env)
 
 int kvm_arch_put_registers(CPUS390XState *env, int level)
 {
+    struct kvm_sregs sregs;
     struct kvm_regs regs;
     int ret;
     int i;
 
-    ret = kvm_vcpu_ioctl(env, KVM_GET_REGS, &regs);
-    if (ret < 0) {
-        return ret;
-    }
+    /* always save the PSW  and the GPRS*/
+    env->kvm_run->psw_addr = env->psw.addr;
+    env->kvm_run->psw_mask = env->psw.mask;
 
-    for (i = 0; i < 16; i++) {
-        regs.gprs[i] = env->regs[i];
+    if (cap_sync_regs && env->kvm_run->kvm_valid_regs & KVM_SYNC_GPRS) {
+        for (i = 0; i < 16; i++) {
+            env->kvm_run->s.regs.gprs[i] = env->regs[i];
+            env->kvm_run->kvm_dirty_regs |= KVM_SYNC_GPRS;
+        }
+    } else {
+        for (i = 0; i < 16; i++) {
+            regs.gprs[i] = env->regs[i];
+        }
+        ret = kvm_vcpu_ioctl(env, KVM_SET_REGS, &regs);
+        if (ret < 0) {
+            return ret;
+        }
     }
 
-    ret = kvm_vcpu_ioctl(env, KVM_SET_REGS, &regs);
-    if (ret < 0) {
-        return ret;
+    /* Do we need to save more than that? */
+    if (level == KVM_PUT_RUNTIME_STATE) {
+        return 0;
     }
 
-    env->kvm_run->psw_addr = env->psw.addr;
-    env->kvm_run->psw_mask = env->psw.mask;
+    if (cap_sync_regs &&
+        env->kvm_run->kvm_valid_regs & KVM_SYNC_ACRS &&
+        env->kvm_run->kvm_valid_regs & KVM_SYNC_CRS) {
+        for (i = 0; i < 16; i++) {
+            env->kvm_run->s.regs.acrs[i] = env->aregs[i];
+            env->kvm_run->s.regs.crs[i] = env->cregs[i];
+        }
+        env->kvm_run->kvm_dirty_regs |= KVM_SYNC_ACRS;
+        env->kvm_run->kvm_dirty_regs |= KVM_SYNC_CRS;
+    } else {
+        for (i = 0; i < 16; i++) {
+            sregs.acrs[i] = env->aregs[i];
+            sregs.crs[i] = env->cregs[i];
+        }
+        ret = kvm_vcpu_ioctl(env, KVM_SET_SREGS, &sregs);
+        if (ret < 0) {
+            return ret;
+        }
+    }
 
-    return ret;
+    /* Finally the prefix */
+    if (cap_sync_regs && env->kvm_run->kvm_valid_regs & KVM_SYNC_PREFIX) {
+        env->kvm_run->s.regs.prefix = env->psa;
+        env->kvm_run->kvm_dirty_regs |= KVM_SYNC_PREFIX;
+    } else {
+        /* prefix is only supported via sync regs */
+    }
+    return 0;
 }
 
 int kvm_arch_get_registers(CPUS390XState *env)
 {
-    int ret;
+    struct kvm_sregs sregs;
     struct kvm_regs regs;
+    int ret;
     int i;
 
-    ret = kvm_vcpu_ioctl(env, KVM_GET_REGS, &regs);
-    if (ret < 0) {
-        return ret;
+    /* get the PSW */
+    env->psw.addr = env->kvm_run->psw_addr;
+    env->psw.mask = env->kvm_run->psw_mask;
+
+    /* the GPRS */
+    if (cap_sync_regs && env->kvm_run->kvm_valid_regs & KVM_SYNC_GPRS) {
+        for (i = 0; i < 16; i++) {
+            env->regs[i] = env->kvm_run->s.regs.gprs[i];
+        }
+    } else {
+        ret = kvm_vcpu_ioctl(env, KVM_GET_REGS, &regs);
+        if (ret < 0) {
+            return ret;
+        }
+         for (i = 0; i < 16; i++) {
+            env->regs[i] = regs.gprs[i];
+        }
     }
 
-    for (i = 0; i < 16; i++) {
-        env->regs[i] = regs.gprs[i];
+    /* The ACRS and CRS */
+    if (cap_sync_regs &&
+        env->kvm_run->kvm_valid_regs & KVM_SYNC_ACRS &&
+        env->kvm_run->kvm_valid_regs & KVM_SYNC_CRS) {
+        for (i = 0; i < 16; i++) {
+            env->aregs[i] = env->kvm_run->s.regs.acrs[i];
+            env->cregs[i] = env->kvm_run->s.regs.crs[i];
+        }
+    } else {
+        ret = kvm_vcpu_ioctl(env, KVM_GET_SREGS, &sregs);
+        if (ret < 0) {
+            return ret;
+        }
+         for (i = 0; i < 16; i++) {
+            env->aregs[i] = sregs.acrs[i];
+            env->cregs[i] = sregs.crs[i];
+        }
     }
 
-    env->psw.addr = env->kvm_run->psw_addr;
-    env->psw.mask = env->kvm_run->psw_mask;
+    /* Finally the prefix */
+    if (cap_sync_regs && env->kvm_run->kvm_valid_regs & KVM_SYNC_PREFIX) {
+        env->psa = env->kvm_run->s.regs.prefix;
+    } else {
+        /* no prefix without sync regs */
+    }
 
     return 0;
 }
commit 1b3e6f88dc151578acb6158e22570cf3ee7cbb69
Author: Heinz Graalfs <graalfs at linux.vnet.ibm.com>
Date:   Mon Oct 29 02:13:20 2012 +0000

    s390/kvm_stat: correct sys_perf_event_open syscall number
    
    Correct sys_perf_event_open syscall number for s390 architecture
       - the hardcoded syscall number 298 is for x86 but should
         be different for other architectures.
         In case we figure out via /proc/cpuinfo that we are running
         on s390 the appropriate syscall number is used from map
         syscall_numbers; other architectures can extend this.
    
    Signed-off-by: Heinz Graalfs <graalfs at linux.vnet.ibm.com>
    Signed-off-by: Jens Freimann <jfrei at linux.vnet.ibm.com>
    Signed-off-by: Alexander Graf <agraf at suse.de>

diff --git a/scripts/kvm/kvm_stat b/scripts/kvm/kvm_stat
index e8d68f0..762544b 100755
--- a/scripts/kvm/kvm_stat
+++ b/scripts/kvm/kvm_stat
@@ -170,6 +170,12 @@ vendor_exit_reasons = {
     'IBM/S390': s390_exit_reasons,
 }
 
+syscall_numbers = {
+    'IBM/S390': 331,
+}
+
+sc_perf_evt_open = 298
+
 exit_reasons = None
 
 for line in file('/proc/cpuinfo').readlines():
@@ -177,7 +183,8 @@ for line in file('/proc/cpuinfo').readlines():
         for flag in line.split():
             if flag in vendor_exit_reasons:
                 exit_reasons = vendor_exit_reasons[flag]
-
+            if flag in syscall_numbers:
+                sc_perf_evt_open = syscall_numbers[flag]
 filters = {
     'kvm_exit': ('exit_reason', exit_reasons)
 }
@@ -206,7 +213,7 @@ class perf_event_attr(ctypes.Structure):
                 ('bp_len', ctypes.c_uint64),
                 ]
 def _perf_event_open(attr, pid, cpu, group_fd, flags):
-    return syscall(298, ctypes.pointer(attr), ctypes.c_int(pid),
+    return syscall(sc_perf_evt_open, ctypes.pointer(attr), ctypes.c_int(pid),
                    ctypes.c_int(cpu), ctypes.c_int(group_fd),
                    ctypes.c_long(flags))
 
commit 235a3f0bed3584fe65079ffa07c7a842971f261e
Author: Alexander Graf <agraf at suse.de>
Date:   Wed Sep 19 17:24:46 2012 +0200

    s390x: fix -initrd in virtio machine
    
    When using -initrd in the virtio machine, we need to indicate the initrd
    start and size inside the kernel image. These parameters need to be stored
    in native endianness.
    
    Signed-off-by: Alexander Graf <agraf at suse.de>
    Acked-by: Richard Henderson <rth at twiddle.net>
    Acked-by: Christian Borntraeger <borntraeger at de.ibm.com>

diff --git a/hw/s390-virtio.c b/hw/s390-virtio.c
index 85bd13e..52fad37 100644
--- a/hw/s390-virtio.c
+++ b/hw/s390-virtio.c
@@ -285,8 +285,8 @@ static void s390_init(QEMUMachineInitArgs *args)
         }
 
         /* we have to overwrite values in the kernel image, which are "rom" */
-        memcpy(rom_ptr(INITRD_PARM_START), &initrd_offset, 8);
-        memcpy(rom_ptr(INITRD_PARM_SIZE), &initrd_size, 8);
+        stq_p(rom_ptr(INITRD_PARM_START), initrd_offset);
+        stq_p(rom_ptr(INITRD_PARM_SIZE), initrd_size);
     }
 
     if (rom_ptr(KERN_PARM_AREA)) {
commit 523a59f596a3e62f5a28eb171adba35e71310040
Author: Avi Kivity <avi at redhat.com>
Date:   Thu Oct 25 12:37:57 2012 +0200

    pci: avoid destroying bridge address space windows in a transaction
    
    Calling memory_region_destroy() in a transaction is illegal (and aborts),
    as until the transaction is committed, the region remains live.
    
    Fix by moving destruction until after the transaction commits.  This requires
    having an extra set of regions, so the new and old regions can coexist.
    
    Signed-off-by: Avi Kivity <avi at redhat.com>
    Signed-off-by: Michael S. Tsirkin <mst at redhat.com>

diff --git a/hw/pci_bridge.c b/hw/pci_bridge.c
index 5c6455f..4680501 100644
--- a/hw/pci_bridge.c
+++ b/hw/pci_bridge.c
@@ -151,58 +151,63 @@ static void pci_bridge_init_alias(PCIBridge *bridge, MemoryRegion *alias,
     memory_region_add_subregion_overlap(parent_space, base, alias, 1);
 }
 
-static void pci_bridge_cleanup_alias(MemoryRegion *alias,
-                                     MemoryRegion *parent_space)
-{
-    memory_region_del_subregion(parent_space, alias);
-    memory_region_destroy(alias);
-}
-
-static void pci_bridge_region_init(PCIBridge *br)
+static PCIBridgeWindows *pci_bridge_region_init(PCIBridge *br)
 {
     PCIBus *parent = br->dev.bus;
+    PCIBridgeWindows *w = g_new(PCIBridgeWindows, 1);
     uint16_t cmd = pci_get_word(br->dev.config + PCI_COMMAND);
 
-    pci_bridge_init_alias(br, &br->alias_pref_mem,
+    pci_bridge_init_alias(br, &w->alias_pref_mem,
                           PCI_BASE_ADDRESS_MEM_PREFETCH,
                           "pci_bridge_pref_mem",
                           &br->address_space_mem,
                           parent->address_space_mem,
                           cmd & PCI_COMMAND_MEMORY);
-    pci_bridge_init_alias(br, &br->alias_mem,
+    pci_bridge_init_alias(br, &w->alias_mem,
                           PCI_BASE_ADDRESS_SPACE_MEMORY,
                           "pci_bridge_mem",
                           &br->address_space_mem,
                           parent->address_space_mem,
                           cmd & PCI_COMMAND_MEMORY);
-    pci_bridge_init_alias(br, &br->alias_io,
+    pci_bridge_init_alias(br, &w->alias_io,
                           PCI_BASE_ADDRESS_SPACE_IO,
                           "pci_bridge_io",
                           &br->address_space_io,
                           parent->address_space_io,
                           cmd & PCI_COMMAND_IO);
    /* TODO: optinal VGA and VGA palette snooping support. */
+
+    return w;
 }
 
-static void pci_bridge_region_cleanup(PCIBridge *br)
+static void pci_bridge_region_del(PCIBridge *br, PCIBridgeWindows *w)
 {
     PCIBus *parent = br->dev.bus;
-    pci_bridge_cleanup_alias(&br->alias_io,
-                             parent->address_space_io);
-    pci_bridge_cleanup_alias(&br->alias_mem,
-                             parent->address_space_mem);
-    pci_bridge_cleanup_alias(&br->alias_pref_mem,
-                             parent->address_space_mem);
+
+    memory_region_del_subregion(parent->address_space_io, &w->alias_io);
+    memory_region_del_subregion(parent->address_space_mem, &w->alias_mem);
+    memory_region_del_subregion(parent->address_space_mem, &w->alias_pref_mem);
+}
+
+static void pci_bridge_region_cleanup(PCIBridge *br, PCIBridgeWindows *w)
+{
+    memory_region_destroy(&w->alias_io);
+    memory_region_destroy(&w->alias_mem);
+    memory_region_destroy(&w->alias_pref_mem);
+    g_free(w);
 }
 
 static void pci_bridge_update_mappings(PCIBridge *br)
 {
+    PCIBridgeWindows *w = br->windows;
+
     /* Make updates atomic to: handle the case of one VCPU updating the bridge
      * while another accesses an unaffected region. */
     memory_region_transaction_begin();
-    pci_bridge_region_cleanup(br);
-    pci_bridge_region_init(br);
+    pci_bridge_region_del(br, br->windows);
+    br->windows = pci_bridge_region_init(br);
     memory_region_transaction_commit();
+    pci_bridge_region_cleanup(br, w);
 }
 
 /* default write_config function for PCI-to-PCI bridge */
@@ -326,7 +331,7 @@ int pci_bridge_initfn(PCIDevice *dev)
     memory_region_init(&br->address_space_mem, "pci_bridge_pci", INT64_MAX);
     sec_bus->address_space_io = &br->address_space_io;
     memory_region_init(&br->address_space_io, "pci_bridge_io", 65536);
-    pci_bridge_region_init(br);
+    br->windows = pci_bridge_region_init(br);
     QLIST_INIT(&sec_bus->child);
     QLIST_INSERT_HEAD(&parent->child, sec_bus, sibling);
     return 0;
@@ -338,7 +343,8 @@ void pci_bridge_exitfn(PCIDevice *pci_dev)
     PCIBridge *s = DO_UPCAST(PCIBridge, dev, pci_dev);
     assert(QLIST_EMPTY(&s->sec_bus.child));
     QLIST_REMOVE(&s->sec_bus, sibling);
-    pci_bridge_region_cleanup(s);
+    pci_bridge_region_del(s, s->windows);
+    pci_bridge_region_cleanup(s, s->windows);
     memory_region_destroy(&s->address_space_mem);
     memory_region_destroy(&s->address_space_io);
     /* qbus_free() is called automatically by qdev_free() */
diff --git a/hw/pci_internals.h b/hw/pci_internals.h
index c931b64..21d0ce6 100644
--- a/hw/pci_internals.h
+++ b/hw/pci_internals.h
@@ -40,6 +40,19 @@ struct PCIBus {
     int *irq_count;
 };
 
+typedef struct PCIBridgeWindows PCIBridgeWindows;
+
+/*
+ * Aliases for each of the address space windows that the bridge
+ * can forward. Mapped into the bridge's parent's address space,
+ * as subregions.
+ */
+struct PCIBridgeWindows {
+    MemoryRegion alias_pref_mem;
+    MemoryRegion alias_mem;
+    MemoryRegion alias_io;
+};
+
 struct PCIBridge {
     PCIDevice dev;
 
@@ -55,14 +68,9 @@ struct PCIBridge {
      */
     MemoryRegion address_space_mem;
     MemoryRegion address_space_io;
-    /*
-     * Aliases for each of the address space windows that the bridge
-     * can forward. Mapped into the bridge's parent's address space,
-     * as subregions.
-     */
-    MemoryRegion alias_pref_mem;
-    MemoryRegion alias_mem;
-    MemoryRegion alias_io;
+
+    PCIBridgeWindows *windows;
+
     pci_map_irq_fn map_irq;
     const char *bus_name;
 };
commit ff3a8066e651230d255e6eea340e2d48e7da4aeb
Author: Michael S. Tsirkin <mst at redhat.com>
Date:   Mon Sep 24 21:05:03 2012 +0200

    virtio-net: enable mrg buf header in tap on linux
    
    Modern linux supports arbitrary header size,
    which makes it possible to pass mrg buf header
    to tap directly without iovec mangling.
    Use this capability when it is there.
    
    This removes the need to deal with it in
    vhost-net as we do now.
    
    Signed-off-by: Michael S. Tsirkin <mst at redhat.com>

diff --git a/hw/vhost_net.c b/hw/vhost_net.c
index df2c4a3..8241601 100644
--- a/hw/vhost_net.c
+++ b/hw/vhost_net.c
@@ -150,10 +150,6 @@ int vhost_net_start(struct vhost_net *net,
     if (r < 0) {
         goto fail_notifiers;
     }
-    if (net->dev.acked_features & (1 << VIRTIO_NET_F_MRG_RXBUF)) {
-        tap_set_vnet_hdr_len(net->nc,
-                             sizeof(struct virtio_net_hdr_mrg_rxbuf));
-    }
 
     r = vhost_dev_start(&net->dev, dev);
     if (r < 0) {
@@ -179,9 +175,6 @@ fail:
     }
     net->nc->info->poll(net->nc, true);
     vhost_dev_stop(&net->dev, dev);
-    if (net->dev.acked_features & (1 << VIRTIO_NET_F_MRG_RXBUF)) {
-        tap_set_vnet_hdr_len(net->nc, sizeof(struct virtio_net_hdr));
-    }
 fail_start:
     vhost_dev_disable_notifiers(&net->dev, dev);
 fail_notifiers:
@@ -199,18 +192,12 @@ void vhost_net_stop(struct vhost_net *net,
     }
     net->nc->info->poll(net->nc, true);
     vhost_dev_stop(&net->dev, dev);
-    if (net->dev.acked_features & (1 << VIRTIO_NET_F_MRG_RXBUF)) {
-        tap_set_vnet_hdr_len(net->nc, sizeof(struct virtio_net_hdr));
-    }
     vhost_dev_disable_notifiers(&net->dev, dev);
 }
 
 void vhost_net_cleanup(struct vhost_net *net)
 {
     vhost_dev_cleanup(&net->dev);
-    if (net->dev.acked_features & (1 << VIRTIO_NET_F_MRG_RXBUF)) {
-        tap_set_vnet_hdr_len(net->nc, sizeof(struct virtio_net_hdr));
-    }
     g_free(net);
 }
 #else
diff --git a/hw/virtio-net.c b/hw/virtio-net.c
index 1180b51..108ce07 100644
--- a/hw/virtio-net.c
+++ b/hw/virtio-net.c
@@ -228,6 +228,20 @@ static int peer_has_ufo(VirtIONet *n)
     return n->has_ufo;
 }
 
+static void virtio_net_set_mrg_rx_bufs(VirtIONet *n, int mergeable_rx_bufs)
+{
+    n->mergeable_rx_bufs = mergeable_rx_bufs;
+
+    n->guest_hdr_len = n->mergeable_rx_bufs ?
+        sizeof(struct virtio_net_hdr_mrg_rxbuf) : sizeof(struct virtio_net_hdr);
+
+    if (peer_has_vnet_hdr(n) &&
+        tap_has_vnet_hdr_len(n->nic->nc.peer, n->guest_hdr_len)) {
+        tap_set_vnet_hdr_len(n->nic->nc.peer, n->guest_hdr_len);
+        n->host_hdr_len = n->guest_hdr_len;
+    }
+}
+
 static uint32_t virtio_net_get_features(VirtIODevice *vdev, uint32_t features)
 {
     VirtIONet *n = to_virtio_net(vdev);
@@ -280,9 +294,7 @@ static void virtio_net_set_features(VirtIODevice *vdev, uint32_t features)
 {
     VirtIONet *n = to_virtio_net(vdev);
 
-    n->mergeable_rx_bufs = !!(features & (1 << VIRTIO_NET_F_MRG_RXBUF));
-    n->guest_hdr_len = n->mergeable_rx_bufs ?
-        sizeof(struct virtio_net_hdr_mrg_rxbuf) : sizeof(struct virtio_net_hdr);
+    virtio_net_set_mrg_rx_bufs(n, !!(features & (1 << VIRTIO_NET_F_MRG_RXBUF)));
 
     if (n->has_vnet_hdr) {
         tap_set_offload(n->nic->nc.peer,
@@ -898,9 +910,8 @@ static int virtio_net_load(QEMUFile *f, void *opaque, int version_id)
 
     qemu_get_buffer(f, n->mac, ETH_ALEN);
     n->tx_waiting = qemu_get_be32(f);
-    n->mergeable_rx_bufs = qemu_get_be32(f);
-    n->guest_hdr_len = n->mergeable_rx_bufs ?
-        sizeof(struct virtio_net_hdr_mrg_rxbuf) : sizeof(struct virtio_net_hdr);
+
+    virtio_net_set_mrg_rx_bufs(n, qemu_get_be32(f));
 
     if (version_id >= 3)
         n->status = qemu_get_be16(f);
@@ -1050,8 +1061,7 @@ VirtIODevice *virtio_net_init(DeviceState *dev, NICConf *conf,
 
     n->tx_waiting = 0;
     n->tx_burst = net->txburst;
-    n->mergeable_rx_bufs = 0;
-    n->guest_hdr_len = sizeof(struct virtio_net_hdr);
+    virtio_net_set_mrg_rx_bufs(n, 0);
     n->promisc = 1; /* for compatibility */
 
     n->mac_table.macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN);
commit 6e371ab867d48c16e6a9ee32cefcea48692a4deb
Author: Michael S. Tsirkin <mst at redhat.com>
Date:   Mon Sep 24 17:04:21 2012 +0200

    virtio-net: test peer header support at init time
    
    There's no reason to query header support at random
    times: at load or feature query.
    Driver also might not query functions.
    Cleaner to do it at device init.
    
    Signed-off-by: Michael S. Tsirkin <mst at redhat.com>

diff --git a/hw/virtio-net.c b/hw/virtio-net.c
index c94521e..1180b51 100644
--- a/hw/virtio-net.c
+++ b/hw/virtio-net.c
@@ -202,16 +202,19 @@ static void virtio_net_reset(VirtIODevice *vdev)
     memset(n->vlans, 0, MAX_VLAN >> 3);
 }
 
-static int peer_has_vnet_hdr(VirtIONet *n)
+static void peer_test_vnet_hdr(VirtIONet *n)
 {
     if (!n->nic->nc.peer)
-        return 0;
+        return;
 
     if (n->nic->nc.peer->info->type != NET_CLIENT_OPTIONS_KIND_TAP)
-        return 0;
+        return;
 
     n->has_vnet_hdr = tap_has_vnet_hdr(n->nic->nc.peer);
+}
 
+static int peer_has_vnet_hdr(VirtIONet *n)
+{
     return n->has_vnet_hdr;
 }
 
@@ -231,10 +234,7 @@ static uint32_t virtio_net_get_features(VirtIODevice *vdev, uint32_t features)
 
     features |= (1 << VIRTIO_NET_F_MAC);
 
-    if (peer_has_vnet_hdr(n)) {
-        tap_using_vnet_hdr(n->nic->nc.peer, 1);
-        n->host_hdr_len = sizeof(struct virtio_net_hdr);
-    } else {
+    if (!peer_has_vnet_hdr(n)) {
         features &= ~(0x1 << VIRTIO_NET_F_CSUM);
         features &= ~(0x1 << VIRTIO_NET_F_HOST_TSO4);
         features &= ~(0x1 << VIRTIO_NET_F_HOST_TSO6);
@@ -940,8 +940,6 @@ static int virtio_net_load(QEMUFile *f, void *opaque, int version_id)
         }
 
         if (n->has_vnet_hdr) {
-            tap_using_vnet_hdr(n->nic->nc.peer, 1);
-            n->host_hdr_len = sizeof(struct virtio_net_hdr);
             tap_set_offload(n->nic->nc.peer,
                     (n->vdev.guest_features >> VIRTIO_NET_F_GUEST_CSUM) & 1,
                     (n->vdev.guest_features >> VIRTIO_NET_F_GUEST_TSO4) & 1,
@@ -1040,6 +1038,13 @@ VirtIODevice *virtio_net_init(DeviceState *dev, NICConf *conf,
     n->status = VIRTIO_NET_S_LINK_UP;
 
     n->nic = qemu_new_nic(&net_virtio_info, conf, object_get_typename(OBJECT(dev)), dev->id, n);
+    peer_test_vnet_hdr(n);
+    if (peer_has_vnet_hdr(n)) {
+        tap_using_vnet_hdr(n->nic->nc.peer, 1);
+        n->host_hdr_len = sizeof(struct virtio_net_hdr);
+    } else {
+        n->host_hdr_len = 0;
+    }
 
     qemu_format_nic_info_str(&n->nic->nc, conf->macaddr.a);
 
commit e043ebc6f9a093c1fd1b677191ad9fbeefe22d1e
Author: Michael S. Tsirkin <mst at redhat.com>
Date:   Mon Sep 24 16:27:27 2012 +0200

    virtio-net: minor code simplification
    
    During packet filtering, we can now use host hdr len
    to offset incoming buffer unconditionally.
    
    Signed-off-by: Michael S. Tsirkin <mst at redhat.com>

diff --git a/hw/virtio-net.c b/hw/virtio-net.c
index dc4a26c..c94521e 100644
--- a/hw/virtio-net.c
+++ b/hw/virtio-net.c
@@ -544,9 +544,7 @@ static int receive_filter(VirtIONet *n, const uint8_t *buf, int size)
     if (n->promisc)
         return 1;
 
-    if (n->has_vnet_hdr) {
-        ptr += sizeof(struct virtio_net_hdr);
-    }
+    ptr += n->host_hdr_len;
 
     if (!memcmp(&ptr[12], vlan, sizeof(vlan))) {
         int vid = be16_to_cpup((uint16_t *)(ptr + 14)) & 0xfff;
commit 7b80d08efc36fd6c7881e98302f00148b5fd908a
Author: Michael S. Tsirkin <mst at redhat.com>
Date:   Mon Sep 24 14:54:44 2012 +0200

    virtio-net: simplify rx code
    
    Remove code duplication using guest header length that we track.
    Drop specific layout requirement for rx buffers: things work
    using generic iovec functions in any case.
    
    Signed-off-by: Michael S. Tsirkin <mst at redhat.com>

diff --git a/hw/virtio-net.c b/hw/virtio-net.c
index 5206648..dc4a26c 100644
--- a/hw/virtio-net.c
+++ b/hw/virtio-net.c
@@ -720,12 +720,7 @@ static int32_t virtio_net_flush_tx(VirtIONet *n, VirtQueue *vq)
         struct iovec *out_sg = &elem.out_sg[0];
         struct iovec sg[VIRTQUEUE_MAX_SIZE];
 
-        /* hdr_len refers to the header received from the guest */
-        hdr_len = n->mergeable_rx_bufs ?
-            sizeof(struct virtio_net_hdr_mrg_rxbuf) :
-            sizeof(struct virtio_net_hdr);
-
-        if (out_num < 1 || out_sg->iov_len != hdr_len) {
+        if (out_num < 1) {
             error_report("virtio-net header not in first element");
             exit(1);
         }
@@ -747,7 +742,7 @@ static int32_t virtio_net_flush_tx(VirtIONet *n, VirtQueue *vq)
             out_sg = sg;
         }
 
-        len = hdr_len;
+        len = n->guest_hdr_len;
 
         ret = qemu_sendv_packet_async(&n->nic->nc, out_sg, out_num,
                                       virtio_net_tx_complete);
commit 14761f9cf7fbc6d058c1e51c313a139066eab256
Author: Michael S. Tsirkin <mst at redhat.com>
Date:   Mon Sep 24 14:52:28 2012 +0200

    virtio-net: switch tx to safe iov functions
    
    Avoid mangling iovec manually: use safe iov_*
    functions.
    
    Signed-off-by: Michael S. Tsirkin <mst at redhat.com>

diff --git a/hw/virtio-net.c b/hw/virtio-net.c
index 59ab674..5206648 100644
--- a/hw/virtio-net.c
+++ b/hw/virtio-net.c
@@ -715,10 +715,10 @@ static int32_t virtio_net_flush_tx(VirtIONet *n, VirtQueue *vq)
     }
 
     while (virtqueue_pop(vq, &elem)) {
-        ssize_t ret, len = 0;
+        ssize_t ret, len;
         unsigned int out_num = elem.out_num;
         struct iovec *out_sg = &elem.out_sg[0];
-        unsigned hdr_len;
+        struct iovec sg[VIRTQUEUE_MAX_SIZE];
 
         /* hdr_len refers to the header received from the guest */
         hdr_len = n->mergeable_rx_bufs ?
@@ -730,18 +730,25 @@ static int32_t virtio_net_flush_tx(VirtIONet *n, VirtQueue *vq)
             exit(1);
         }
 
-        /* ignore the header if GSO is not supported */
-        if (!n->has_vnet_hdr) {
-            out_num--;
-            out_sg++;
-            len += hdr_len;
-        } else if (n->mergeable_rx_bufs) {
-            /* tapfd expects a struct virtio_net_hdr */
-            hdr_len -= sizeof(struct virtio_net_hdr);
-            out_sg->iov_len -= hdr_len;
-            len += hdr_len;
+        /*
+         * If host wants to see the guest header as is, we can
+         * pass it on unchanged. Otherwise, copy just the parts
+         * that host is interested in.
+         */
+        assert(n->host_hdr_len <= n->guest_hdr_len);
+        if (n->host_hdr_len != n->guest_hdr_len) {
+            unsigned sg_num = iov_copy(sg, ARRAY_SIZE(sg),
+                                       out_sg, out_num,
+                                       0, n->host_hdr_len);
+            sg_num += iov_copy(sg + sg_num, ARRAY_SIZE(sg) - sg_num,
+                             out_sg, out_num,
+                             n->guest_hdr_len, -1);
+            out_num = sg_num;
+            out_sg = sg;
         }
 
+        len = hdr_len;
+
         ret = qemu_sendv_packet_async(&n->nic->nc, out_sg, out_num,
                                       virtio_net_tx_complete);
         if (ret == 0) {
commit c8d28e7e336869524d166d88f08ad476eadedccb
Author: Michael S. Tsirkin <mst at redhat.com>
Date:   Mon Sep 24 13:26:55 2012 +0200

    virtio-net: first s/g is always at start of buf
    
    We know offset is 0, assert that.
    
    Signed-off-by: Michael S. Tsirkin <mst at redhat.com>

diff --git a/hw/virtio-net.c b/hw/virtio-net.c
index 5128a2a..59ab674 100644
--- a/hw/virtio-net.c
+++ b/hw/virtio-net.c
@@ -631,6 +631,7 @@ static ssize_t virtio_net_receive(NetClientState *nc, const uint8_t *buf, size_t
         }
 
         if (i == 0) {
+            assert(offset == 0);
             if (n->mergeable_rx_bufs) {
                 mhdr_cnt = iov_copy(mhdr_sg, ARRAY_SIZE(mhdr_sg),
                                     sg, elem.in_num,
@@ -638,8 +639,8 @@ static ssize_t virtio_net_receive(NetClientState *nc, const uint8_t *buf, size_t
                                     sizeof(mhdr.num_buffers));
             }
 
-            receive_header(n, sg, elem.in_num, buf + offset, size - offset);
-            offset += n->host_hdr_len;
+            receive_header(n, sg, elem.in_num, buf, size);
+            offset = n->host_hdr_len;
             total += n->guest_hdr_len;
             guest_offset = n->guest_hdr_len;
         } else {
commit 280598b7a5fdf96fb79d87a2129750bad5dbf24b
Author: Michael S. Tsirkin <mst at redhat.com>
Date:   Mon Sep 24 13:24:17 2012 +0200

    virtio-net: refactor receive_hdr
    
    Now that we know host hdr length, we don't need to
    duplicate the logic in receive_hdr: caller can
    figure out the offset itself.
    
    Signed-off-by: Michael S. Tsirkin <mst at redhat.com>

diff --git a/hw/virtio-net.c b/hw/virtio-net.c
index d315fdf..5128a2a 100644
--- a/hw/virtio-net.c
+++ b/hw/virtio-net.c
@@ -516,17 +516,15 @@ static void work_around_broken_dhclient(struct virtio_net_hdr *hdr,
     }
 }
 
-static int receive_header(VirtIONet *n, const struct iovec *iov, int iov_cnt,
-                          const void *buf, size_t size)
+static void receive_header(VirtIONet *n, const struct iovec *iov, int iov_cnt,
+                           const void *buf, size_t size)
 {
-    int offset = 0;
-
     if (n->has_vnet_hdr) {
         /* FIXME this cast is evil */
         void *wbuf = (void *)buf;
-        work_around_broken_dhclient(wbuf, wbuf + offset, size - offset);
-        offset = sizeof(struct virtio_net_hdr);
-        iov_from_buf(iov, iov_cnt, 0, buf, offset);
+        work_around_broken_dhclient(wbuf, wbuf + n->host_hdr_len,
+                                    size - n->host_hdr_len);
+        iov_from_buf(iov, iov_cnt, 0, buf, sizeof(struct virtio_net_hdr));
     } else {
         struct virtio_net_hdr hdr = {
             .flags = 0,
@@ -534,8 +532,6 @@ static int receive_header(VirtIONet *n, const struct iovec *iov, int iov_cnt,
         };
         iov_from_buf(iov, iov_cnt, 0, &hdr, sizeof hdr);
     }
-
-    return offset;
 }
 
 static int receive_filter(VirtIONet *n, const uint8_t *buf, int size)
@@ -642,8 +638,8 @@ static ssize_t virtio_net_receive(NetClientState *nc, const uint8_t *buf, size_t
                                     sizeof(mhdr.num_buffers));
             }
 
-            offset += receive_header(n, sg, elem.in_num,
-                                     buf + offset, size - offset);
+            receive_header(n, sg, elem.in_num, buf + offset, size - offset);
+            offset += n->host_hdr_len;
             total += n->guest_hdr_len;
             guest_offset = n->guest_hdr_len;
         } else {
commit 63c5872873de8d7d994a589eed7bfe6a70cc8e06
Author: Michael S. Tsirkin <mst at redhat.com>
Date:   Mon Sep 24 13:17:13 2012 +0200

    virtio-net: use safe iov operations for rx
    
    Avoid magling iov manually: use safe iov operations
    for processing packets incoming to guest.
    This also removes the requirement for virtio header to
    fit the first s/g entry exactly.
    
    Signed-off-by: Michael S. Tsirkin <mst at redhat.com>

diff --git a/hw/virtio-net.c b/hw/virtio-net.c
index 8461586..d315fdf 100644
--- a/hw/virtio-net.c
+++ b/hw/virtio-net.c
@@ -594,8 +594,9 @@ static int receive_filter(VirtIONet *n, const uint8_t *buf, int size)
 static ssize_t virtio_net_receive(NetClientState *nc, const uint8_t *buf, size_t size)
 {
     VirtIONet *n = DO_UPCAST(NICState, nc, nc)->opaque;
-    struct virtio_net_hdr_mrg_rxbuf *mhdr = NULL;
-    const struct iovec *sg = elem.in_sg;
+    struct iovec mhdr_sg[VIRTQUEUE_MAX_SIZE];
+    struct virtio_net_hdr_mrg_rxbuf mhdr;
+    unsigned mhdr_cnt = 0;
     size_t offset, i, guest_offset;
 
     if (!virtio_net_can_receive(&n->nic->nc))
@@ -633,14 +634,13 @@ static ssize_t virtio_net_receive(NetClientState *nc, const uint8_t *buf, size_t
             exit(1);
         }
 
-        if (!n->mergeable_rx_bufs && elem.in_sg[0].iov_len != n->guest_hdr_len) {
-            error_report("virtio-net header not in first element");
-            exit(1);
-        }
-
         if (i == 0) {
-            if (n->mergeable_rx_bufs)
-                mhdr = (struct virtio_net_hdr_mrg_rxbuf *)sg[0].iov_base;
+            if (n->mergeable_rx_bufs) {
+                mhdr_cnt = iov_copy(mhdr_sg, ARRAY_SIZE(mhdr_sg),
+                                    sg, elem.in_num,
+                                    offsetof(typeof(mhdr), num_buffers),
+                                    sizeof(mhdr.num_buffers));
+            }
 
             offset += receive_header(n, sg, elem.in_num,
                                      buf + offset, size - offset);
@@ -673,8 +673,11 @@ static ssize_t virtio_net_receive(NetClientState *nc, const uint8_t *buf, size_t
         virtqueue_fill(n->rx_vq, &elem, total, i++);
     }
 
-    if (mhdr) {
-        stw_p(&mhdr->num_buffers, i);
+    if (mhdr_cnt) {
+        stw_p(&mhdr.num_buffers, i);
+        iov_from_buf(mhdr_sg, mhdr_cnt,
+                     0,
+                     &mhdr.num_buffers, sizeof mhdr.num_buffers);
     }
 
     virtqueue_flush(n->rx_vq, i);
commit 22cc84db6e42bef8646b8cd671f4c999e9c0a38f
Author: Michael S. Tsirkin <mst at redhat.com>
Date:   Mon Sep 24 13:14:16 2012 +0200

    virtio-net: avoid sg copy
    
    Avoid tweaking iovec during receive. This removes
    the need to copy the vector.
    Note: we currently have an evil cast in work_around_broken_dhclient
    and unfortunately this patch does not fix it - just
    pushes the evil cast to another place.
    
    Signed-off-by: Michael S. Tsirkin <mst at redhat.com>

diff --git a/hw/virtio-net.c b/hw/virtio-net.c
index a0c79e1..8461586 100644
--- a/hw/virtio-net.c
+++ b/hw/virtio-net.c
@@ -504,40 +504,37 @@ static int virtio_net_has_buffers(VirtIONet *n, int bufsize)
  * cache.
  */
 static void work_around_broken_dhclient(struct virtio_net_hdr *hdr,
-                                        const uint8_t *buf, size_t size)
+                                        uint8_t *buf, size_t size)
 {
     if ((hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) && /* missing csum */
         (size > 27 && size < 1500) && /* normal sized MTU */
         (buf[12] == 0x08 && buf[13] == 0x00) && /* ethertype == IPv4 */
         (buf[23] == 17) && /* ip.protocol == UDP */
         (buf[34] == 0 && buf[35] == 67)) { /* udp.srcport == bootps */
-        /* FIXME this cast is evil */
-        net_checksum_calculate((uint8_t *)buf, size);
+        net_checksum_calculate(buf, size);
         hdr->flags &= ~VIRTIO_NET_HDR_F_NEEDS_CSUM;
     }
 }
 
-static int receive_header(VirtIONet *n, struct iovec *iov, int iovcnt,
-                          const void *buf, size_t size, size_t hdr_len)
+static int receive_header(VirtIONet *n, const struct iovec *iov, int iov_cnt,
+                          const void *buf, size_t size)
 {
-    struct virtio_net_hdr *hdr = (struct virtio_net_hdr *)iov[0].iov_base;
     int offset = 0;
 
-    hdr->flags = 0;
-    hdr->gso_type = VIRTIO_NET_HDR_GSO_NONE;
-
     if (n->has_vnet_hdr) {
-        memcpy(hdr, buf, sizeof(*hdr));
-        offset = sizeof(*hdr);
-        work_around_broken_dhclient(hdr, buf + offset, size - offset);
+        /* FIXME this cast is evil */
+        void *wbuf = (void *)buf;
+        work_around_broken_dhclient(wbuf, wbuf + offset, size - offset);
+        offset = sizeof(struct virtio_net_hdr);
+        iov_from_buf(iov, iov_cnt, 0, buf, offset);
+    } else {
+        struct virtio_net_hdr hdr = {
+            .flags = 0,
+            .gso_type = VIRTIO_NET_HDR_GSO_NONE
+        };
+        iov_from_buf(iov, iov_cnt, 0, &hdr, sizeof hdr);
     }
 
-    /* We only ever receive a struct virtio_net_hdr from the tapfd,
-     * but we may be passing along a larger header to the guest.
-     */
-    iov[0].iov_base += hdr_len;
-    iov[0].iov_len  -= hdr_len;
-
     return offset;
 }
 
@@ -598,7 +595,8 @@ static ssize_t virtio_net_receive(NetClientState *nc, const uint8_t *buf, size_t
 {
     VirtIONet *n = DO_UPCAST(NICState, nc, nc)->opaque;
     struct virtio_net_hdr_mrg_rxbuf *mhdr = NULL;
-    size_t offset, i;
+    const struct iovec *sg = elem.in_sg;
+    size_t offset, i, guest_offset;
 
     if (!virtio_net_can_receive(&n->nic->nc))
         return -1;
@@ -615,7 +613,7 @@ static ssize_t virtio_net_receive(NetClientState *nc, const uint8_t *buf, size_t
     while (offset < size) {
         VirtQueueElement elem;
         int len, total;
-        struct iovec sg[VIRTQUEUE_MAX_SIZE];
+        const struct iovec *sg = elem.in_sg;
 
         total = 0;
 
@@ -640,20 +638,20 @@ static ssize_t virtio_net_receive(NetClientState *nc, const uint8_t *buf, size_t
             exit(1);
         }
 
-        memcpy(&sg, &elem.in_sg[0], sizeof(sg[0]) * elem.in_num);
-
         if (i == 0) {
             if (n->mergeable_rx_bufs)
                 mhdr = (struct virtio_net_hdr_mrg_rxbuf *)sg[0].iov_base;
 
             offset += receive_header(n, sg, elem.in_num,
-                                     buf + offset, size - offset,
-                                     n->guest_hdr_len);
+                                     buf + offset, size - offset);
             total += n->guest_hdr_len;
+            guest_offset = n->guest_hdr_len;
+        } else {
+            guest_offset = 0;
         }
 
         /* copy in packet.  ugh */
-        len = iov_from_buf(sg, elem.in_num, 0,
+        len = iov_from_buf(sg, elem.in_num, guest_offset,
                            buf + offset, size - offset);
         total += len;
         offset += len;
commit d336336c8164859e4527cbb9f3df189f8bb406de
Author: Michael S. Tsirkin <mst at redhat.com>
Date:   Mon Sep 24 13:02:52 2012 +0200

    iov: add iov_cpy
    
    Add API to copy part of iovec safely.
    
    Signed-off-by: Michael S. Tsirkin <mst at redhat.com>

diff --git a/iov.c b/iov.c
index c6a66f0..b7378bf 100644
--- a/iov.c
+++ b/iov.c
@@ -228,3 +228,26 @@ void iov_hexdump(const struct iovec *iov, const unsigned int iov_cnt,
         fprintf(fp, "\n");
     }
 }
+
+unsigned iov_copy(struct iovec *dst_iov, unsigned int dst_iov_cnt,
+                 const struct iovec *iov, unsigned int iov_cnt,
+                 size_t offset, size_t bytes)
+{
+    size_t len;
+    unsigned int i, j;
+    for (i = 0, j = 0; i < iov_cnt && j < dst_iov_cnt && bytes; i++) {
+        if (offset >= iov[i].iov_len) {
+            offset -= iov[i].iov_len;
+            continue;
+        }
+        len = MIN(bytes, iov[i].iov_len - offset);
+
+        dst_iov[j].iov_base = iov[i].iov_base + offset;
+        dst_iov[j].iov_len = len;
+        j++;
+        bytes -= len;
+        offset = 0;
+    }
+    assert(offset == 0);
+    return j;
+}
diff --git a/iov.h b/iov.h
index a73569f..34c8ec9 100644
--- a/iov.h
+++ b/iov.h
@@ -86,3 +86,12 @@ ssize_t iov_send_recv(int sockfd, struct iovec *iov, unsigned iov_cnt,
  */
 void iov_hexdump(const struct iovec *iov, const unsigned int iov_cnt,
                  FILE *fp, const char *prefix, size_t limit);
+
+/*
+ * Partial copy of vector from iov to dst_iov (data is not copied).
+ * dst_iov overlaps iov at a specified offset.
+ * size of dst_iov is at most bytes. dst vector count is returned.
+ */
+unsigned iov_copy(struct iovec *dst_iov, unsigned int dst_iov_cnt,
+                 const struct iovec *iov, unsigned int iov_cnt,
+                 size_t offset, size_t bytes);
commit e35e23f655fb120a2b4d0695bdee86fafd9caabf
Author: Michael S. Tsirkin <mst at redhat.com>
Date:   Mon Sep 24 12:12:25 2012 +0200

    virtio-net: track host/guest header length
    
    Tracking these in device state instead of
    re-calculating on each packet. No functional
    changes.
    
    Signed-off-by: Michael S. Tsirkin <mst at redhat.com>

diff --git a/hw/virtio-net.c b/hw/virtio-net.c
index 50ba728..a0c79e1 100644
--- a/hw/virtio-net.c
+++ b/hw/virtio-net.c
@@ -41,6 +41,8 @@ typedef struct VirtIONet
     int32_t tx_burst;
     int tx_waiting;
     uint32_t has_vnet_hdr;
+    size_t host_hdr_len;
+    size_t guest_hdr_len;
     uint8_t has_ufo;
     struct {
         VirtQueueElement elem;
@@ -231,6 +233,7 @@ static uint32_t virtio_net_get_features(VirtIODevice *vdev, uint32_t features)
 
     if (peer_has_vnet_hdr(n)) {
         tap_using_vnet_hdr(n->nic->nc.peer, 1);
+        n->host_hdr_len = sizeof(struct virtio_net_hdr);
     } else {
         features &= ~(0x1 << VIRTIO_NET_F_CSUM);
         features &= ~(0x1 << VIRTIO_NET_F_HOST_TSO4);
@@ -278,6 +281,8 @@ static void virtio_net_set_features(VirtIODevice *vdev, uint32_t features)
     VirtIONet *n = to_virtio_net(vdev);
 
     n->mergeable_rx_bufs = !!(features & (1 << VIRTIO_NET_F_MRG_RXBUF));
+    n->guest_hdr_len = n->mergeable_rx_bufs ?
+        sizeof(struct virtio_net_hdr_mrg_rxbuf) : sizeof(struct virtio_net_hdr);
 
     if (n->has_vnet_hdr) {
         tap_set_offload(n->nic->nc.peer,
@@ -593,18 +598,13 @@ static ssize_t virtio_net_receive(NetClientState *nc, const uint8_t *buf, size_t
 {
     VirtIONet *n = DO_UPCAST(NICState, nc, nc)->opaque;
     struct virtio_net_hdr_mrg_rxbuf *mhdr = NULL;
-    size_t guest_hdr_len, offset, i, host_hdr_len;
+    size_t offset, i;
 
     if (!virtio_net_can_receive(&n->nic->nc))
         return -1;
 
     /* hdr_len refers to the header we supply to the guest */
-    guest_hdr_len = n->mergeable_rx_bufs ?
-        sizeof(struct virtio_net_hdr_mrg_rxbuf) : sizeof(struct virtio_net_hdr);
-
-
-    host_hdr_len = n->has_vnet_hdr ? sizeof(struct virtio_net_hdr) : 0;
-    if (!virtio_net_has_buffers(n, size + guest_hdr_len - host_hdr_len))
+    if (!virtio_net_has_buffers(n, size + n->guest_hdr_len - n->host_hdr_len))
         return 0;
 
     if (!receive_filter(n, buf, size))
@@ -626,7 +626,7 @@ static ssize_t virtio_net_receive(NetClientState *nc, const uint8_t *buf, size_t
                     "i %zd mergeable %d offset %zd, size %zd, "
                     "guest hdr len %zd, host hdr len %zd guest features 0x%x",
                     i, n->mergeable_rx_bufs, offset, size,
-                    guest_hdr_len, host_hdr_len, n->vdev.guest_features);
+                    n->guest_hdr_len, n->host_hdr_len, n->vdev.guest_features);
             exit(1);
         }
 
@@ -635,7 +635,7 @@ static ssize_t virtio_net_receive(NetClientState *nc, const uint8_t *buf, size_t
             exit(1);
         }
 
-        if (!n->mergeable_rx_bufs && elem.in_sg[0].iov_len != guest_hdr_len) {
+        if (!n->mergeable_rx_bufs && elem.in_sg[0].iov_len != n->guest_hdr_len) {
             error_report("virtio-net header not in first element");
             exit(1);
         }
@@ -647,8 +647,9 @@ static ssize_t virtio_net_receive(NetClientState *nc, const uint8_t *buf, size_t
                 mhdr = (struct virtio_net_hdr_mrg_rxbuf *)sg[0].iov_base;
 
             offset += receive_header(n, sg, elem.in_num,
-                                     buf + offset, size - offset, guest_hdr_len);
-            total += guest_hdr_len;
+                                     buf + offset, size - offset,
+                                     n->guest_hdr_len);
+            total += n->guest_hdr_len;
         }
 
         /* copy in packet.  ugh */
@@ -665,7 +666,7 @@ static ssize_t virtio_net_receive(NetClientState *nc, const uint8_t *buf, size_t
                          "i %zd mergeable %d offset %zd, size %zd, "
                          "guest hdr len %zd, host hdr len %zd",
                          i, n->mergeable_rx_bufs,
-                         offset, size, guest_hdr_len, host_hdr_len);
+                         offset, size, n->guest_hdr_len, n->host_hdr_len);
 #endif
             return size;
         }
@@ -900,6 +901,8 @@ static int virtio_net_load(QEMUFile *f, void *opaque, int version_id)
     qemu_get_buffer(f, n->mac, ETH_ALEN);
     n->tx_waiting = qemu_get_be32(f);
     n->mergeable_rx_bufs = qemu_get_be32(f);
+    n->guest_hdr_len = n->mergeable_rx_bufs ?
+        sizeof(struct virtio_net_hdr_mrg_rxbuf) : sizeof(struct virtio_net_hdr);
 
     if (version_id >= 3)
         n->status = qemu_get_be16(f);
@@ -940,6 +943,7 @@ static int virtio_net_load(QEMUFile *f, void *opaque, int version_id)
 
         if (n->has_vnet_hdr) {
             tap_using_vnet_hdr(n->nic->nc.peer, 1);
+            n->host_hdr_len = sizeof(struct virtio_net_hdr);
             tap_set_offload(n->nic->nc.peer,
                     (n->vdev.guest_features >> VIRTIO_NET_F_GUEST_CSUM) & 1,
                     (n->vdev.guest_features >> VIRTIO_NET_F_GUEST_TSO4) & 1,
@@ -1044,6 +1048,7 @@ VirtIODevice *virtio_net_init(DeviceState *dev, NICConf *conf,
     n->tx_waiting = 0;
     n->tx_burst = net->txburst;
     n->mergeable_rx_bufs = 0;
+    n->guest_hdr_len = sizeof(struct virtio_net_hdr);
     n->promisc = 1; /* for compatibility */
 
     n->mac_table.macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN);
commit b308c82cbda44e138ef990af64d44a5613c16092
Author: Avi Kivity <avi at redhat.com>
Date:   Thu Oct 25 12:37:57 2012 +0200

    pci: avoid destroying bridge address space windows in a transaction
    
    Calling memory_region_destroy() in a transaction is illegal (and aborts),
    as until the transaction is committed, the region remains live.
    
    Fix by moving destruction until after the transaction commits.  This requires
    having an extra set of regions, so the new and old regions can coexist.
    
    Signed-off-by: Avi Kivity <avi at redhat.com>
    Signed-off-by: Anthony Liguori <aliguori at us.ibm.com>

diff --git a/hw/pci_bridge.c b/hw/pci_bridge.c
index 5c6455f..4680501 100644
--- a/hw/pci_bridge.c
+++ b/hw/pci_bridge.c
@@ -151,58 +151,63 @@ static void pci_bridge_init_alias(PCIBridge *bridge, MemoryRegion *alias,
     memory_region_add_subregion_overlap(parent_space, base, alias, 1);
 }
 
-static void pci_bridge_cleanup_alias(MemoryRegion *alias,
-                                     MemoryRegion *parent_space)
-{
-    memory_region_del_subregion(parent_space, alias);
-    memory_region_destroy(alias);
-}
-
-static void pci_bridge_region_init(PCIBridge *br)
+static PCIBridgeWindows *pci_bridge_region_init(PCIBridge *br)
 {
     PCIBus *parent = br->dev.bus;
+    PCIBridgeWindows *w = g_new(PCIBridgeWindows, 1);
     uint16_t cmd = pci_get_word(br->dev.config + PCI_COMMAND);
 
-    pci_bridge_init_alias(br, &br->alias_pref_mem,
+    pci_bridge_init_alias(br, &w->alias_pref_mem,
                           PCI_BASE_ADDRESS_MEM_PREFETCH,
                           "pci_bridge_pref_mem",
                           &br->address_space_mem,
                           parent->address_space_mem,
                           cmd & PCI_COMMAND_MEMORY);
-    pci_bridge_init_alias(br, &br->alias_mem,
+    pci_bridge_init_alias(br, &w->alias_mem,
                           PCI_BASE_ADDRESS_SPACE_MEMORY,
                           "pci_bridge_mem",
                           &br->address_space_mem,
                           parent->address_space_mem,
                           cmd & PCI_COMMAND_MEMORY);
-    pci_bridge_init_alias(br, &br->alias_io,
+    pci_bridge_init_alias(br, &w->alias_io,
                           PCI_BASE_ADDRESS_SPACE_IO,
                           "pci_bridge_io",
                           &br->address_space_io,
                           parent->address_space_io,
                           cmd & PCI_COMMAND_IO);
    /* TODO: optinal VGA and VGA palette snooping support. */
+
+    return w;
 }
 
-static void pci_bridge_region_cleanup(PCIBridge *br)
+static void pci_bridge_region_del(PCIBridge *br, PCIBridgeWindows *w)
 {
     PCIBus *parent = br->dev.bus;
-    pci_bridge_cleanup_alias(&br->alias_io,
-                             parent->address_space_io);
-    pci_bridge_cleanup_alias(&br->alias_mem,
-                             parent->address_space_mem);
-    pci_bridge_cleanup_alias(&br->alias_pref_mem,
-                             parent->address_space_mem);
+
+    memory_region_del_subregion(parent->address_space_io, &w->alias_io);
+    memory_region_del_subregion(parent->address_space_mem, &w->alias_mem);
+    memory_region_del_subregion(parent->address_space_mem, &w->alias_pref_mem);
+}
+
+static void pci_bridge_region_cleanup(PCIBridge *br, PCIBridgeWindows *w)
+{
+    memory_region_destroy(&w->alias_io);
+    memory_region_destroy(&w->alias_mem);
+    memory_region_destroy(&w->alias_pref_mem);
+    g_free(w);
 }
 
 static void pci_bridge_update_mappings(PCIBridge *br)
 {
+    PCIBridgeWindows *w = br->windows;
+
     /* Make updates atomic to: handle the case of one VCPU updating the bridge
      * while another accesses an unaffected region. */
     memory_region_transaction_begin();
-    pci_bridge_region_cleanup(br);
-    pci_bridge_region_init(br);
+    pci_bridge_region_del(br, br->windows);
+    br->windows = pci_bridge_region_init(br);
     memory_region_transaction_commit();
+    pci_bridge_region_cleanup(br, w);
 }
 
 /* default write_config function for PCI-to-PCI bridge */
@@ -326,7 +331,7 @@ int pci_bridge_initfn(PCIDevice *dev)
     memory_region_init(&br->address_space_mem, "pci_bridge_pci", INT64_MAX);
     sec_bus->address_space_io = &br->address_space_io;
     memory_region_init(&br->address_space_io, "pci_bridge_io", 65536);
-    pci_bridge_region_init(br);
+    br->windows = pci_bridge_region_init(br);
     QLIST_INIT(&sec_bus->child);
     QLIST_INSERT_HEAD(&parent->child, sec_bus, sibling);
     return 0;
@@ -338,7 +343,8 @@ void pci_bridge_exitfn(PCIDevice *pci_dev)
     PCIBridge *s = DO_UPCAST(PCIBridge, dev, pci_dev);
     assert(QLIST_EMPTY(&s->sec_bus.child));
     QLIST_REMOVE(&s->sec_bus, sibling);
-    pci_bridge_region_cleanup(s);
+    pci_bridge_region_del(s, s->windows);
+    pci_bridge_region_cleanup(s, s->windows);
     memory_region_destroy(&s->address_space_mem);
     memory_region_destroy(&s->address_space_io);
     /* qbus_free() is called automatically by qdev_free() */
diff --git a/hw/pci_internals.h b/hw/pci_internals.h
index c931b64..21d0ce6 100644
--- a/hw/pci_internals.h
+++ b/hw/pci_internals.h
@@ -40,6 +40,19 @@ struct PCIBus {
     int *irq_count;
 };
 
+typedef struct PCIBridgeWindows PCIBridgeWindows;
+
+/*
+ * Aliases for each of the address space windows that the bridge
+ * can forward. Mapped into the bridge's parent's address space,
+ * as subregions.
+ */
+struct PCIBridgeWindows {
+    MemoryRegion alias_pref_mem;
+    MemoryRegion alias_mem;
+    MemoryRegion alias_io;
+};
+
 struct PCIBridge {
     PCIDevice dev;
 
@@ -55,14 +68,9 @@ struct PCIBridge {
      */
     MemoryRegion address_space_mem;
     MemoryRegion address_space_io;
-    /*
-     * Aliases for each of the address space windows that the bridge
-     * can forward. Mapped into the bridge's parent's address space,
-     * as subregions.
-     */
-    MemoryRegion alias_pref_mem;
-    MemoryRegion alias_mem;
-    MemoryRegion alias_io;
+
+    PCIBridgeWindows *windows;
+
     pci_map_irq_fn map_irq;
     const char *bus_name;
 };
commit bc927e488c040c8ef2faed323e8a1a3d0ebbfb62
Author: Jason Baron <jbaron at redhat.com>
Date:   Fri Oct 19 16:43:33 2012 -0400

    pcie: Convert PCIExpressHost to use the QOM.
    
    Let's use PCIExpressHost with QOM.
    
    Reviewed-by: Paolo Bonzini <pbonzini at redhat.com>
    Acked-by: Andreas FÃ¤rber <afaerber at suse.de>
    Signed-off-by: Jason Baron <jbaron at redhat.com>
    Signed-off-by: Michael S. Tsirkin <mst at redhat.com>

diff --git a/hw/pcie_host.c b/hw/pcie_host.c
index 2231d28..c257fb4 100644
--- a/hw/pcie_host.c
+++ b/hw/pcie_host.c
@@ -145,3 +145,17 @@ void pcie_host_mmcfg_update(PCIExpressHost *e,
         pcie_host_mmcfg_map(e, addr, size);
     }
 }
+
+static const TypeInfo pcie_host_type_info = {
+    .name = TYPE_PCIE_HOST_BRIDGE,
+    .parent = TYPE_PCI_HOST_BRIDGE,
+    .abstract = true,
+    .instance_size = sizeof(PCIExpressHost),
+};
+
+static void pcie_host_register_types(void)
+{
+    type_register_static(&pcie_host_type_info);
+}
+
+type_init(pcie_host_register_types)
diff --git a/hw/pcie_host.h b/hw/pcie_host.h
index 73e88db..3921935 100644
--- a/hw/pcie_host.h
+++ b/hw/pcie_host.h
@@ -24,6 +24,10 @@
 #include "pci_host.h"
 #include "memory.h"
 
+#define TYPE_PCIE_HOST_BRIDGE "pcie-host-bridge"
+#define PCIE_HOST_BRIDGE(obj) \
+    OBJECT_CHECK(PCIExpressHost, (obj), TYPE_PCIE_HOST_BRIDGE)
+
 struct PCIExpressHost {
     PCIHostState pci;
 
commit c702ddb8daece08b16fce9d6654b38304d385f93
Author: Jason Baron <jbaron at redhat.com>
Date:   Fri Oct 19 16:43:32 2012 -0400

    pcie: pass pcie window size to pcie_host_mmcfg_update()
    
    This allows q35 to pass/set the size of the pcie window in its update routine.
    
    Reviewed-by: Paolo Bonzini <pbonzini at redhat.com>
    Signed-off-by: Jason Baron <jbaron at redhat.com>
    Signed-off-by: Michael S. Tsirkin <mst at redhat.com>

diff --git a/hw/pcie_host.c b/hw/pcie_host.c
index 9f7f3d3..2231d28 100644
--- a/hw/pcie_host.c
+++ b/hw/pcie_host.c
@@ -107,14 +107,9 @@ static const MemoryRegionOps pcie_mmcfg_ops = {
 /* pcie_host::base_addr == PCIE_BASE_ADDR_UNMAPPED when it isn't mapped. */
 #define PCIE_BASE_ADDR_UNMAPPED  ((hwaddr)-1ULL)
 
-int pcie_host_init(PCIExpressHost *e, uint32_t size)
+int pcie_host_init(PCIExpressHost *e)
 {
-    assert(!(size & (size - 1)));       /* power of 2 */
-    assert(size >= PCIE_MMCFG_SIZE_MIN);
-    assert(size <= PCIE_MMCFG_SIZE_MAX);
     e->base_addr = PCIE_BASE_ADDR_UNMAPPED;
-    e->size = size;
-    memory_region_init_io(&e->mmio, &pcie_mmcfg_ops, e, "pcie-mmcfg", e->size);
 
     return 0;
 }
@@ -123,22 +118,30 @@ void pcie_host_mmcfg_unmap(PCIExpressHost *e)
 {
     if (e->base_addr != PCIE_BASE_ADDR_UNMAPPED) {
         memory_region_del_subregion(get_system_memory(), &e->mmio);
+        memory_region_destroy(&e->mmio);
         e->base_addr = PCIE_BASE_ADDR_UNMAPPED;
     }
 }
 
-void pcie_host_mmcfg_map(PCIExpressHost *e, hwaddr addr)
+void pcie_host_mmcfg_map(PCIExpressHost *e, hwaddr addr,
+                         uint32_t size)
 {
+    assert(!(size & (size - 1)));       /* power of 2 */
+    assert(size >= PCIE_MMCFG_SIZE_MIN);
+    assert(size <= PCIE_MMCFG_SIZE_MAX);
+    e->size = size;
+    memory_region_init_io(&e->mmio, &pcie_mmcfg_ops, e, "pcie-mmcfg", e->size);
     e->base_addr = addr;
     memory_region_add_subregion(get_system_memory(), e->base_addr, &e->mmio);
 }
 
 void pcie_host_mmcfg_update(PCIExpressHost *e,
                             int enable,
-                            hwaddr addr)
+                            hwaddr addr,
+                            uint32_t size)
 {
     pcie_host_mmcfg_unmap(e);
     if (enable) {
-        pcie_host_mmcfg_map(e, addr);
+        pcie_host_mmcfg_map(e, addr, size);
     }
 }
diff --git a/hw/pcie_host.h b/hw/pcie_host.h
index 9978b9f..73e88db 100644
--- a/hw/pcie_host.h
+++ b/hw/pcie_host.h
@@ -39,11 +39,12 @@ struct PCIExpressHost {
     MemoryRegion mmio;
 };
 
-int pcie_host_init(PCIExpressHost *e, uint32_t size);
+int pcie_host_init(PCIExpressHost *e);
 void pcie_host_mmcfg_unmap(PCIExpressHost *e);
-void pcie_host_mmcfg_map(PCIExpressHost *e, hwaddr addr);
+void pcie_host_mmcfg_map(PCIExpressHost *e, hwaddr addr, uint32_t size);
 void pcie_host_mmcfg_update(PCIExpressHost *e,
                             int enable,
-                            hwaddr addr);
+                            hwaddr addr,
+                            uint32_t size);
 
 #endif /* PCIE_HOST_H */
commit f7748569902f4854ac1223c143edbde4f588040f
Author: Jan Kiszka <jan.kiszka at siemens.com>
Date:   Fri Oct 19 16:43:31 2012 -0400

    pci: Add class 0xc05 as 'SMBus'
    
    [jbaron at redhat.com: add PCI_CLASS_SERIAL_SMBUS definition]
    Reviewed-by: Paolo Bonzini <pbonzini at redhat.com>
    Signed-off-by: Jan Kiszka <jan.kiszka at siemens.com>
    Signed-off-by: Jason Baron <jbaron at redhat.com>
    Signed-off-by: Michael S. Tsirkin <mst at redhat.com>

diff --git a/hw/pci.c b/hw/pci.c
index 63b1857..dceda0b 100644
--- a/hw/pci.c
+++ b/hw/pci.c
@@ -1237,6 +1237,7 @@ static const pci_class_desc pci_class_descriptions[] =
     { 0x0c02, "SSA controller", "ssa"},
     { 0x0c03, "USB controller", "usb"},
     { 0x0c04, "Fibre channel controller", "fibre-channel"},
+    { 0x0c05, "SMBus"},
     { 0, NULL}
 };
 
diff --git a/hw/pci_ids.h b/hw/pci_ids.h
index 0bfc28f..41f3570 100644
--- a/hw/pci_ids.h
+++ b/hw/pci_ids.h
@@ -31,6 +31,7 @@
 #define PCI_CLASS_SYSTEM_OTHER           0x0880
 
 #define PCI_CLASS_SERIAL_USB             0x0c03
+#define PCI_CLASS_SERIAL_SMBUS           0x0c05
 
 #define PCI_CLASS_BRIDGE_HOST            0x0600
 #define PCI_CLASS_BRIDGE_ISA             0x0601
commit 91e5615984c1fd5674caad343e750bb5ecd17995
Author: Isaku Yamahata <yamahata at valinux.co.jp>
Date:   Fri Oct 19 16:43:28 2012 -0400

    pci: introduce pci_swizzle_map_irq_fn() for standardized interrupt pin swizzle
    
    Introduce pci_swizzle_map_irq_fn() for interrupt pin swizzle which is
    standardized. PCI bridge swizzle is common logic, by introducing
    this function duplicated swizzle logic will be avoided later.
    
    [jbaron at redhat.com: drop opaque argument]
    Reviewed-by: Paolo Bonzini <pbonzini at redhat.com>
    Signed-off-by: Isaku Yamahata <yamahata at valinux.co.jp>
    Signed-off-by: Jason Baron <jbaron at redhat.com>
    Signed-off-by: Michael S. Tsirkin <mst at redhat.com>

diff --git a/hw/pci.c b/hw/pci.c
index 82c971a..63b1857 100644
--- a/hw/pci.c
+++ b/hw/pci.c
@@ -1155,6 +1155,24 @@ void pci_device_set_intx_routing_notifier(PCIDevice *dev,
     dev->intx_routing_notifier = notifier;
 }
 
+/*
+ * PCI-to-PCI bridge specification
+ * 9.1: Interrupt routing. Table 9-1
+ *
+ * the PCI Express Base Specification, Revision 2.1
+ * 2.2.8.1: INTx interrutp signaling - Rules
+ *          the Implementation Note
+ *          Table 2-20
+ */
+/*
+ * 0 <= pin <= 3 0 = INTA, 1 = INTB, 2 = INTC, 3 = INTD
+ * 0-origin unlike PCI interrupt pin register.
+ */
+int pci_swizzle_map_irq_fn(PCIDevice *pci_dev, int pin)
+{
+    return (pin + PCI_SLOT(pci_dev->devfn)) % PCI_NUM_PINS;
+}
+
 /***********************************************************/
 /* monitor info on PCI */
 
diff --git a/hw/pci.h b/hw/pci.h
index a852941..241c1d8 100644
--- a/hw/pci.h
+++ b/hw/pci.h
@@ -318,6 +318,8 @@ void pci_bus_irqs(PCIBus *bus, pci_set_irq_fn set_irq, pci_map_irq_fn map_irq,
                   void *irq_opaque, int nirq);
 int pci_bus_get_irq_level(PCIBus *bus, int irq_num);
 void pci_bus_hotplug(PCIBus *bus, pci_hotplug_fn hotplug, DeviceState *dev);
+/* 0 <= pin <= 3 0 = INTA, 1 = INTB, 2 = INTC, 3 = INTD */
+int pci_swizzle_map_irq_fn(PCIDevice *pci_dev, int pin);
 PCIBus *pci_register_bus(DeviceState *parent, const char *name,
                          pci_set_irq_fn set_irq, pci_map_irq_fn map_irq,
                          void *irq_opaque,
commit 9e38f56183c52e06fc29c64691f59a46d246eec5
Author: Isaku Yamahata <yamahata at valinux.co.jp>
Date:   Fri Oct 19 16:43:31 2012 -0400

    pci_ids: add intel 82801BA pci-to-pci bridge id
    
    Adds pci id constants which will be used by q35.
    
    Reviewed-by: Paolo Bonzini <pbonzini at redhat.com>
    Signed-off-by: Isaku Yamahata <yamahata at valinux.co.jp>
    Signed-off-by: Jason Baron <jbaron at redhat.com>
    Signed-off-by: Michael S. Tsirkin <mst at redhat.com>

diff --git a/hw/pci_ids.h b/hw/pci_ids.h
index c017a79..0bfc28f 100644
--- a/hw/pci_ids.h
+++ b/hw/pci_ids.h
@@ -105,6 +105,7 @@
 #define PCI_DEVICE_ID_INTEL_82378        0x0484
 #define PCI_DEVICE_ID_INTEL_82441        0x1237
 #define PCI_DEVICE_ID_INTEL_82801AA_5    0x2415
+#define PCI_DEVICE_ID_INTEL_82801BA_11   0x244e
 #define PCI_DEVICE_ID_INTEL_82801D       0x24CD
 #define PCI_DEVICE_ID_INTEL_ESB_9        0x25ab
 #define PCI_DEVICE_ID_INTEL_82371SB_0    0x7000
commit b56d701f1d1f1828c9fabea535b3460857546dd0
Author: Isaku Yamahata <yamahata at valinux.co.jp>
Date:   Fri Oct 19 16:43:28 2012 -0400

    pci: pci capability must be in PCI space
    
    pci capability must be in PCI space.
    It can't lay in PCIe extended config space.
    
    Reviewed-by: Paolo Bonzini <pbonzini at redhat.com>
    Signed-off-by: Isaku Yamahata <yamahata at valinux.co.jp>
    Signed-off-by: Jason Baron <jbaron at redhat.com>
    Signed-off-by: Michael S. Tsirkin <mst at redhat.com>

diff --git a/hw/pci.c b/hw/pci.c
index 35cb374..82c971a 100644
--- a/hw/pci.c
+++ b/hw/pci.c
@@ -1678,16 +1678,16 @@ PCIDevice *pci_create_simple(PCIBus *bus, int devfn, const char *name)
     return pci_create_simple_multifunction(bus, devfn, false, name);
 }
 
-static int pci_find_space(PCIDevice *pdev, uint8_t size)
+static uint8_t pci_find_space(PCIDevice *pdev, uint8_t size)
 {
-    int config_size = pci_config_size(pdev);
     int offset = PCI_CONFIG_HEADER_SIZE;
     int i;
-    for (i = PCI_CONFIG_HEADER_SIZE; i < config_size; ++i)
+    for (i = PCI_CONFIG_HEADER_SIZE; i < PCI_CONFIG_SPACE_SIZE; ++i) {
         if (pdev->used[i])
             offset = i + 1;
         else if (i - offset + 1 == size)
             return offset;
+    }
     return 0;
 }
 
commit e26631b74663f59b5873a84ed5b92ee4ddf48255
Author: Michael S. Tsirkin <mst at redhat.com>
Date:   Mon Oct 22 12:35:00 2012 +0200

    pci: make each capability DWORD aligned
    
    PCI spec (see e.g. 6.7 Capabilities List in spec rev 3.0)
    requires that each capability is DWORD aligned.
    Ensure this when allocating space by rounding size up to 4.
    
    Signed-off-by: Michael S. Tsirkin <mst at redhat.com>

diff --git a/hw/pci.c b/hw/pci.c
index 70b5fd9..35cb374 100644
--- a/hw/pci.c
+++ b/hw/pci.c
@@ -1906,7 +1906,7 @@ int pci_add_capability(PCIDevice *pdev, uint8_t cap_id,
     config[PCI_CAP_LIST_NEXT] = pdev->config[PCI_CAPABILITY_LIST];
     pdev->config[PCI_CAPABILITY_LIST] = offset;
     pdev->config[PCI_STATUS] |= PCI_STATUS_CAP_LIST;
-    memset(pdev->used + offset, 0xFF, size);
+    memset(pdev->used + offset, 0xFF, QEMU_ALIGN_UP(size, 4));
     /* Make capability read-only by default */
     memset(pdev->wmask + offset, 0, size);
     /* Check capability by default */
@@ -1926,7 +1926,7 @@ void pci_del_capability(PCIDevice *pdev, uint8_t cap_id, uint8_t size)
     memset(pdev->w1cmask + offset, 0, size);
     /* Clear cmask as device-specific registers can't be checked */
     memset(pdev->cmask + offset, 0, size);
-    memset(pdev->used + offset, 0, size);
+    memset(pdev->used + offset, 0, QEMU_ALIGN_UP(size, 4));
 
     if (!pdev->config[PCI_CAPABILITY_LIST])
         pdev->config[PCI_STATUS] &= ~PCI_STATUS_CAP_LIST;
commit dc59944bc9a5ad784572eea57610de60e4a2f4e5
Author: Michael S. Tsirkin <mst at redhat.com>
Date:   Thu Oct 18 00:15:48 2012 +0200

    qemu: enable PV EOI for qemu 1.3
    
    Enable KVM PV EOI by default. You can still disable it with
    -kvm_pv_eoi cpu flag. To avoid breaking cross-version migration,
    enable only for qemu 1.3 (or in the future, newer) machine type.
    
    Signed-off-by: Michael S. Tsirkin <mst at redhat.com>

diff --git a/hw/pc_piix.c b/hw/pc_piix.c
index c7dd75b..85529b2 100644
--- a/hw/pc_piix.c
+++ b/hw/pc_piix.c
@@ -43,6 +43,7 @@
 #include "xen.h"
 #include "memory.h"
 #include "exec-memory.h"
+#include "cpu.h"
 #ifdef CONFIG_XEN
 #  include <xen/hvm/hvm_info_table.h>
 #endif
@@ -302,6 +303,12 @@ static void pc_init_pci(QEMUMachineInitArgs *args)
              initrd_filename, cpu_model, 1, 1);
 }
 
+static void pc_init_pci_1_3(QEMUMachineInitArgs *args)
+{
+    enable_kvm_pv_eoi();
+    pc_init_pci(args);
+}
+
 static void pc_init_pci_no_kvmclock(QEMUMachineInitArgs *args)
 {
     ram_addr_t ram_size = args->ram_size;
@@ -349,7 +356,7 @@ static QEMUMachine pc_machine_v1_3 = {
     .name = "pc-1.3",
     .alias = "pc",
     .desc = "Standard PC",
-    .init = pc_init_pci,
+    .init = pc_init_pci_1_3,
     .max_cpus = 255,
     .is_default = 1,
 };
diff --git a/target-i386/cpu.c b/target-i386/cpu.c
index 6411042..d4f2e65 100644
--- a/target-i386/cpu.c
+++ b/target-i386/cpu.c
@@ -125,6 +125,25 @@ typedef struct model_features_t {
 int check_cpuid = 0;
 int enforce_cpuid = 0;
 
+#if defined(CONFIG_KVM)
+static uint32_t kvm_default_features = (1 << KVM_FEATURE_CLOCKSOURCE) |
+        (1 << KVM_FEATURE_NOP_IO_DELAY) |
+        (1 << KVM_FEATURE_MMU_OP) |
+        (1 << KVM_FEATURE_CLOCKSOURCE2) |
+        (1 << KVM_FEATURE_ASYNC_PF) |
+        (1 << KVM_FEATURE_STEAL_TIME) |
+        (1 << KVM_FEATURE_CLOCKSOURCE_STABLE_BIT);
+static const uint32_t kvm_pv_eoi_features = (0x1 << KVM_FEATURE_PV_EOI);
+#else
+static uint32_t kvm_default_features = 0;
+static const uint32_t kvm_pv_eoi_features = 0;
+#endif
+
+void enable_kvm_pv_eoi(void)
+{
+    kvm_default_features |= kvm_pv_eoi_features;
+}
+
 void host_cpuid(uint32_t function, uint32_t count,
                 uint32_t *eax, uint32_t *ebx, uint32_t *ecx, uint32_t *edx)
 {
@@ -1108,7 +1127,7 @@ static int cpu_x86_find_by_name(x86_def_t *x86_cpu_def, const char *cpu_model)
     /* Features to be added*/
     uint32_t plus_features = 0, plus_ext_features = 0;
     uint32_t plus_ext2_features = 0, plus_ext3_features = 0;
-    uint32_t plus_kvm_features = 0, plus_svm_features = 0;
+    uint32_t plus_kvm_features = kvm_default_features, plus_svm_features = 0;
     uint32_t plus_7_0_ebx_features = 0;
     /* Features to be removed */
     uint32_t minus_features = 0, minus_ext_features = 0;
@@ -1128,18 +1147,6 @@ static int cpu_x86_find_by_name(x86_def_t *x86_cpu_def, const char *cpu_model)
         memcpy(x86_cpu_def, def, sizeof(*def));
     }
 
-#if defined(CONFIG_KVM)
-    plus_kvm_features = (1 << KVM_FEATURE_CLOCKSOURCE) |
-        (1 << KVM_FEATURE_NOP_IO_DELAY) | 
-        (1 << KVM_FEATURE_MMU_OP) |
-        (1 << KVM_FEATURE_CLOCKSOURCE2) |
-        (1 << KVM_FEATURE_ASYNC_PF) | 
-        (1 << KVM_FEATURE_STEAL_TIME) |
-        (1 << KVM_FEATURE_CLOCKSOURCE_STABLE_BIT);
-#else
-    plus_kvm_features = 0;
-#endif
-
     add_flagname_to_bitmaps("hypervisor", &plus_features,
             &plus_ext_features, &plus_ext2_features, &plus_ext3_features,
             &plus_kvm_features, &plus_svm_features,  &plus_7_0_ebx_features);
diff --git a/target-i386/cpu.h b/target-i386/cpu.h
index 871c270..de33303 100644
--- a/target-i386/cpu.h
+++ b/target-i386/cpu.h
@@ -1188,4 +1188,6 @@ void do_smm_enter(CPUX86State *env1);
 
 void cpu_report_tpr_access(CPUX86State *env, TPRAccess access);
 
+void enable_kvm_pv_eoi(void);
+
 #endif /* CPU_I386_H */
commit 05c0621e64b425d9f89bef542f0b85e61dc57ff8
Author: Alex Williamson <alex.williamson at redhat.com>
Date:   Wed Oct 17 16:13:12 2012 -0600

    pci: Return PCI_INTX_DISABLED when no bus INTx routing support
    
    Rather than assert, simply return PCI_INTX_DISABLED when we don't
    have a pci_route_irq_fn.  PIIX already returns DISABLED for an
    invalid pin, so users already deal with this state.  Users of this
    interface should only be acting on an ENABLED or INVERTED return
    value (though we really have no support for INVERTED).  Also
    complain loudly when we hit this so we don't forget it's missing.
    
    Signed-off-by: Alex Williamson <alex.williamson at redhat.com>
    Signed-off-by: Michael S. Tsirkin <mst at redhat.com>
    Acked-by: Jan Kiszka <jan.kiszka at siemens.com>

diff --git a/hw/pci.c b/hw/pci.c
index 8c6b3d1..70b5fd9 100644
--- a/hw/pci.c
+++ b/hw/pci.c
@@ -1117,7 +1117,13 @@ PCIINTxRoute pci_device_route_intx_to_irq(PCIDevice *dev, int pin)
          pin = bus->map_irq(dev, pin);
          dev = bus->parent_dev;
     } while (dev);
-    assert(bus->route_intx_to_irq);
+
+    if (!bus->route_intx_to_irq) {
+        error_report("PCI: Bug - unimplemented PCI INTx routing (%s)\n",
+                     object_get_typename(OBJECT(bus->qbus.parent)));
+        return (PCIINTxRoute) { PCI_INTX_DISABLED, -1 };
+    }
+
     return bus->route_intx_to_irq(bus->irq_opaque, pin);
 }
 
commit 2b199f9318ec698dfced65f5d4ad9729f166e37c
Author: Alex Williamson <alex.williamson at redhat.com>
Date:   Tue Oct 2 13:22:14 2012 -0600

    pci-assign: Use msi_get_message()
    
    pci-assign only uses a subset of the flexibility msi_get_message()
    provides, but it's still worthwhile to use it.
    
    Signed-off-by: Alex Williamson <alex.williamson at redhat.com>
    Signed-off-by: Michael S. Tsirkin <mst at redhat.com>

diff --git a/hw/kvm/pci-assign.c b/hw/kvm/pci-assign.c
index ab01205..e80dad0 100644
--- a/hw/kvm/pci-assign.c
+++ b/hw/kvm/pci-assign.c
@@ -996,12 +996,9 @@ static void assigned_dev_update_msi(PCIDevice *pci_dev)
     }
 
     if (ctrl_byte & PCI_MSI_FLAGS_ENABLE) {
-        uint8_t *pos = pci_dev->config + pci_dev->msi_cap;
-        MSIMessage msg;
+        MSIMessage msg = msi_get_message(pci_dev, 0);
         int virq;
 
-        msg.address = pci_get_long(pos + PCI_MSI_ADDRESS_LO);
-        msg.data = pci_get_word(pos + PCI_MSI_DATA_32);
         virq = kvm_irqchip_add_msi_route(kvm_state, msg);
         if (virq < 0) {
             perror("assigned_dev_update_msi: kvm_irqchip_add_msi_route");
commit 39b9bc626a436837f0068cb600aaf91c0e8aa937
Author: Alex Williamson <alex.williamson at redhat.com>
Date:   Tue Oct 2 13:22:07 2012 -0600

    msi: Add msi_get_message()
    
    vfio-pci and pci-assign both do this on their own for setting up
    direct MSI injection through KVM.  Provide a helper function for
    this in MSI code.
    
    Signed-off-by: Alex Williamson <alex.williamson at redhat.com>
    Signed-off-by: Michael S. Tsirkin <mst at redhat.com>

diff --git a/hw/msi.c b/hw/msi.c
index e2273a0..33037a8 100644
--- a/hw/msi.c
+++ b/hw/msi.c
@@ -122,6 +122,31 @@ void msi_set_message(PCIDevice *dev, MSIMessage msg)
     pci_set_word(dev->config + msi_data_off(dev, msi64bit), msg.data);
 }
 
+MSIMessage msi_get_message(PCIDevice *dev, unsigned int vector)
+{
+    uint16_t flags = pci_get_word(dev->config + msi_flags_off(dev));
+    bool msi64bit = flags & PCI_MSI_FLAGS_64BIT;
+    unsigned int nr_vectors = msi_nr_vectors(flags);
+    MSIMessage msg;
+
+    assert(vector < nr_vectors);
+
+    if (msi64bit) {
+        msg.address = pci_get_quad(dev->config + msi_address_lo_off(dev));
+    } else {
+        msg.address = pci_get_long(dev->config + msi_address_lo_off(dev));
+    }
+
+    /* upper bit 31:16 is zero */
+    msg.data = pci_get_word(dev->config + msi_data_off(dev, msi64bit));
+    if (nr_vectors > 1) {
+        msg.data &= ~(nr_vectors - 1);
+        msg.data |= vector;
+    }
+
+    return msg;
+}
+
 bool msi_enabled(const PCIDevice *dev)
 {
     return msi_present(dev) &&
@@ -249,8 +274,7 @@ void msi_notify(PCIDevice *dev, unsigned int vector)
     uint16_t flags = pci_get_word(dev->config + msi_flags_off(dev));
     bool msi64bit = flags & PCI_MSI_FLAGS_64BIT;
     unsigned int nr_vectors = msi_nr_vectors(flags);
-    uint64_t address;
-    uint32_t data;
+    MSIMessage msg;
 
     assert(vector < nr_vectors);
     if (msi_is_masked(dev, vector)) {
@@ -261,24 +285,13 @@ void msi_notify(PCIDevice *dev, unsigned int vector)
         return;
     }
 
-    if (msi64bit) {
-        address = pci_get_quad(dev->config + msi_address_lo_off(dev));
-    } else {
-        address = pci_get_long(dev->config + msi_address_lo_off(dev));
-    }
-
-    /* upper bit 31:16 is zero */
-    data = pci_get_word(dev->config + msi_data_off(dev, msi64bit));
-    if (nr_vectors > 1) {
-        data &= ~(nr_vectors - 1);
-        data |= vector;
-    }
+    msg = msi_get_message(dev, vector);
 
     MSI_DEV_PRINTF(dev,
                    "notify vector 0x%x"
                    " address: 0x%"PRIx64" data: 0x%"PRIx32"\n",
-                   vector, address, data);
-    stl_le_phys(address, data);
+                   vector, msg.address, msg.data);
+    stl_le_phys(msg.address, msg.data);
 }
 
 /* Normally called by pci_default_write_config(). */
diff --git a/hw/msi.h b/hw/msi.h
index 6ec1f99..150b09a 100644
--- a/hw/msi.h
+++ b/hw/msi.h
@@ -32,6 +32,7 @@ struct MSIMessage {
 extern bool msi_supported;
 
 void msi_set_message(PCIDevice *dev, MSIMessage msg);
+MSIMessage msi_get_message(PCIDevice *dev, unsigned int vector);
 bool msi_enabled(const PCIDevice *dev);
 int msi_init(struct PCIDevice *dev, uint8_t offset,
              unsigned int nr_vectors, bool msi64bit, bool msi_per_vector_mask);
commit 4774d7b258e0c4a6595a7b0bc6960c1751365bbf
Author: Alex Williamson <alex.williamson at redhat.com>
Date:   Tue Oct 2 13:22:01 2012 -0600

    pci-assign: Use pci_intx_route_changed()
    
    Replace open coded version
    
    Signed-off-by: Alex Williamson <alex.williamson at redhat.com>
    Signed-off-by: Michael S. Tsirkin <mst at redhat.com>

diff --git a/hw/kvm/pci-assign.c b/hw/kvm/pci-assign.c
index bfffbab..ab01205 100644
--- a/hw/kvm/pci-assign.c
+++ b/hw/kvm/pci-assign.c
@@ -882,8 +882,7 @@ static int assign_intx(AssignedDevice *dev)
     intx_route = pci_device_route_intx_to_irq(&dev->dev, dev->intpin);
     assert(intx_route.mode != PCI_INTX_INVERTED);
 
-    if (dev->intx_route.mode == intx_route.mode &&
-        dev->intx_route.irq == intx_route.irq) {
+    if (!pci_intx_route_changed(&dev->intx_route, &intx_route)) {
         return 0;
     }
 
commit d6e65d54f0fa980f891aa3166d85b6ffd7d541eb
Author: Alex Williamson <alex.williamson at redhat.com>
Date:   Tue Oct 2 13:21:54 2012 -0600

    pci: Helper function for testing if an INTx route changed
    
    Signed-off-by: Alex Williamson <alex.williamson at redhat.com>
    Signed-off-by: Michael S. Tsirkin <mst at redhat.com>

diff --git a/hw/pci.c b/hw/pci.c
index d44fd0e..8c6b3d1 100644
--- a/hw/pci.c
+++ b/hw/pci.c
@@ -1121,6 +1121,11 @@ PCIINTxRoute pci_device_route_intx_to_irq(PCIDevice *dev, int pin)
     return bus->route_intx_to_irq(bus->irq_opaque, pin);
 }
 
+bool pci_intx_route_changed(PCIINTxRoute *old, PCIINTxRoute *new)
+{
+    return old->mode != new->mode || old->irq != new->irq;
+}
+
 void pci_bus_fire_intx_routing_notifier(PCIBus *bus)
 {
     PCIDevice *dev;
diff --git a/hw/pci.h b/hw/pci.h
index 1f902f5..a852941 100644
--- a/hw/pci.h
+++ b/hw/pci.h
@@ -326,6 +326,7 @@ PCIBus *pci_register_bus(DeviceState *parent, const char *name,
                          uint8_t devfn_min, int nirq);
 void pci_bus_set_route_irq_fn(PCIBus *, pci_route_irq_fn);
 PCIINTxRoute pci_device_route_intx_to_irq(PCIDevice *dev, int pin);
+bool pci_intx_route_changed(PCIINTxRoute *old, PCIINTxRoute *new);
 void pci_bus_fire_intx_routing_notifier(PCIBus *bus);
 void pci_device_set_intx_routing_notifier(PCIDevice *dev,
                                           PCIINTxRoutingNotifier notifier);
commit 6b0e6468e33883c1739a7474fe6baca170eec9aa
Merge: 90c45b3... 0ebfb14...
Author: Anthony Liguori <aliguori at us.ibm.com>
Date:   Mon Oct 29 10:34:29 2012 -0500

    Merge remote-tracking branch 'kraxel/usb.68' into staging
    
    * kraxel/usb.68: (36 commits)
      xhci: fix usb name in caps
      xhci: make number of interrupters and slots configurable
      xhci: allow disabling interrupters
      xhci: flush endpoint context unconditinally
      xhci: fix function name in error message
      uhci: Use only one queue for ctrl endpoints
      uhci: Retry to fill the queue while waiting for td completion
      uhci: Always mark a queue valid when we encounter it
      uhci: When the guest marks a pending td non-active, cancel the queue
      uhci: Detect guest td re-use
      uhci: Verify queue has not been changed by guest
      uhci: Immediately free queues on device disconnect
      uhci: Store ep in UHCIQueue
      uhci: Make uhci_fill_queue() actually operate on an UHCIQueue
      uhci: Add uhci_read_td() helper function
      uhci: Rename UHCIAsync->td to UHCIAsync->td_addr
      uhci: Move emptying of the queue's asyncs' queue to uhci_queue_free
      uhci: Drop unnecessary forward declaration of some static functions
      uhci: Don't retry on error
      uhci: cleanup: Add an unlink call to uhci_async_cancel()
      ...
    
    Signed-off-by: Anthony Liguori <aliguori at us.ibm.com>

commit 90c45b30319dee7d4137f18817395454728ea2f9
Merge: d262cb0... 3b6eda2...
Author: Anthony Liguori <aliguori at us.ibm.com>
Date:   Mon Oct 29 10:34:05 2012 -0500

    Merge remote-tracking branch 'kwolf/for-anthony' into staging
    
    * kwolf/for-anthony: (32 commits)
      osdep: Less restrictive F_SEFL in qemu_dup_flags()
      qemu-iotests: add testcases for mirroring on-source-error/on-target-error
      qmp: add pull_event function
      mirror: add support for on-source-error/on-target-error
      iostatus: forward block_job_iostatus_reset to block job
      qemu-iotests: add mirroring test case
      mirror: implement completion
      qmp: add drive-mirror command
      mirror: introduce mirror job
      block: introduce BLOCK_JOB_READY event
      block: add block-job-complete
      block: rename block_job_complete to block_job_completed
      block: export dirty bitmap information in query-block
      block: introduce new dirty bitmap functionality
      block: add bdrv_open_backing_file
      block: add bdrv_query_stats
      block: add bdrv_query_info
      qemu-config: Add new -add-fd command line option
      monitor: Prevent removing fd from set during init
      monitor: Enable adding an inherited fd to an fd set
      ...
    
    Conflicts:
    	vl.c
    
    Signed-off-by: Anthony Liguori <aliguori at us.ibm.com>

diff --cc vl.c
index 6dd767c,b870caf..5a3d316
--- a/vl.c
+++ b/vl.c
@@@ -789,17 -790,78 +789,89 @@@ static int parse_sandbox(QemuOpts *opts
      return 0;
  }
  
 +/*********QEMU USB setting******/
 +bool usb_enabled(bool default_usb)
 +{
 +    QemuOpts *mach_opts;
 +    mach_opts = qemu_opts_find(qemu_find_opts("machine"), 0);
 +    if (mach_opts) {
 +        return qemu_opt_get_bool(mach_opts, "usb", default_usb);
 +    }
 +    return default_usb;
 +}
 +
+ #ifndef _WIN32
+ static int parse_add_fd(QemuOpts *opts, void *opaque)
+ {
+     int fd, dupfd, flags;
+     int64_t fdset_id;
+     const char *fd_opaque = NULL;
+ 
+     fd = qemu_opt_get_number(opts, "fd", -1);
+     fdset_id = qemu_opt_get_number(opts, "set", -1);
+     fd_opaque = qemu_opt_get(opts, "opaque");
+ 
+     if (fd < 0) {
+         qerror_report(ERROR_CLASS_GENERIC_ERROR,
+                       "fd option is required and must be non-negative");
+         return -1;
+     }
+ 
+     if (fd <= STDERR_FILENO) {
+         qerror_report(ERROR_CLASS_GENERIC_ERROR,
+                       "fd cannot be a standard I/O stream");
+         return -1;
+     }
+ 
+     /*
+      * All fds inherited across exec() necessarily have FD_CLOEXEC
+      * clear, while qemu sets FD_CLOEXEC on all other fds used internally.
+      */
+     flags = fcntl(fd, F_GETFD);
+     if (flags == -1 || (flags & FD_CLOEXEC)) {
+         qerror_report(ERROR_CLASS_GENERIC_ERROR,
+                       "fd is not valid or already in use");
+         return -1;
+     }
+ 
+     if (fdset_id < 0) {
+         qerror_report(ERROR_CLASS_GENERIC_ERROR,
+                       "set option is required and must be non-negative");
+         return -1;
+     }
+ 
+ #ifdef F_DUPFD_CLOEXEC
+     dupfd = fcntl(fd, F_DUPFD_CLOEXEC, 0);
+ #else
+     dupfd = dup(fd);
+     if (dupfd != -1) {
+         qemu_set_cloexec(dupfd);
+     }
+ #endif
+     if (dupfd == -1) {
+         qerror_report(ERROR_CLASS_GENERIC_ERROR,
+                       "Error duplicating fd: %s", strerror(errno));
+         return -1;
+     }
+ 
+     /* add the duplicate fd, and optionally the opaque string, to the fd set */
+     monitor_fdset_add_fd(dupfd, true, fdset_id, fd_opaque ? true : false,
+                          fd_opaque, NULL);
+ 
+     return 0;
+ }
+ 
+ static int cleanup_add_fd(QemuOpts *opts, void *opaque)
+ {
+     int fd;
+ 
+     fd = qemu_opt_get_number(opts, "fd", -1);
+     close(fd);
+ 
+     return 0;
+ }
+ #endif
+ 
  /***********************************************************/
  /* QEMU Block devices */
  
commit d262cb02861dd33375c08fc798930653b14769e9
Merge: 3f4331b... a178274...
Author: Aurelien Jarno <aurelien at aurel32.net>
Date:   Mon Oct 29 14:56:17 2012 +0100

    Merge branch 'ppc-for-upstream' of git://repo.or.cz/qemu/agraf
    
    * 'ppc-for-upstream' of git://repo.or.cz/qemu/agraf: (22 commits)
      PPC: pseries: Remove hack for PIO window
      PPC: e500: Map PIO space into core memory region
      xen_platform: convert PIO to new memory api read/write
      vmport: convert PIO to new memory api read/write
      serial: convert PIO to new memory api read/write
      rtl8139: convert PIO to new memory api read/write
      pckbd: convert PIO to new memory api read/write
      pc port92: convert PIO to new memory api read/write
      mc146818rtc: convert PIO to new memory api read/write
      m48t59: convert PIO to new memory api read/write
      i8254: convert PIO to new memory api read/write
      es1370: convert PIO to new memory api read/write
      virtio-pci: convert PIO to new memory api read/write
      ac97: convert PIO to new memory api read/write
      pseries: Implement qemu initiated shutdowns using EPOW events
      target-ppc: Rework storage of VPA registration state
      pseries: Don't allow duplicate registration of hcalls or RTAS calls
      Add USB option in machine options
      e500: Fix serial initialization
      PPC: 440: Emulate DCBR0
      ...

commit 3f4331bfd112840e94935b0fd098ee88c07beabc
Merge: 01f590d... 29ed72f...
Author: Aurelien Jarno <aurelien at aurel32.net>
Date:   Mon Oct 29 14:55:51 2012 +0100

    Merge branch 'queue/qmp' of git://repo.or.cz/qemu/qmp-unstable
    
    * 'queue/qmp' of git://repo.or.cz/qemu/qmp-unstable:
      migration: go to paused state after finishing incoming migration with -S
      qmp: handle stop/cont in INMIGRATE state
      hmp: fix info cpus for sparc targets

commit a178274efabcbbc5d44805b51def874e47051325
Author: Alexander Graf <agraf at suse.de>
Date:   Mon Oct 8 13:54:18 2012 +0200

    PPC: pseries: Remove hack for PIO window
    
    Now that all users of old_portio are gone, we can remove the hack
    that enabled us to support them.
    
    Signed-off-by: Alexander Graf <agraf at suse.de>

diff --git a/hw/spapr_pci.c b/hw/spapr_pci.c
index a08ed11..c2c3079 100644
--- a/hw/spapr_pci.c
+++ b/hw/spapr_pci.c
@@ -439,43 +439,6 @@ static void pci_spapr_set_irq(void *opaque, int irq_num, int level)
     qemu_set_irq(spapr_phb_lsi_qirq(phb, irq_num), level);
 }
 
-static uint64_t spapr_io_read(void *opaque, hwaddr addr,
-                              unsigned size)
-{
-    switch (size) {
-    case 1:
-        return cpu_inb(addr);
-    case 2:
-        return cpu_inw(addr);
-    case 4:
-        return cpu_inl(addr);
-    }
-    assert(0);
-}
-
-static void spapr_io_write(void *opaque, hwaddr addr,
-                           uint64_t data, unsigned size)
-{
-    switch (size) {
-    case 1:
-        cpu_outb(addr, data);
-        return;
-    case 2:
-        cpu_outw(addr, data);
-        return;
-    case 4:
-        cpu_outl(addr, data);
-        return;
-    }
-    assert(0);
-}
-
-static const MemoryRegionOps spapr_io_ops = {
-    .endianness = DEVICE_LITTLE_ENDIAN,
-    .read = spapr_io_read,
-    .write = spapr_io_write
-};
-
 /*
  * MSI/MSIX memory region implementation.
  * The handler handles both MSI and MSIX.
@@ -545,14 +508,9 @@ static int spapr_phb_init(SysBusDevice *s)
      * old_portion are updated */
     sprintf(namebuf, "%s.io", sphb->dtbusname);
     memory_region_init(&sphb->iospace, namebuf, SPAPR_PCI_IO_WIN_SIZE);
-    /* FIXME: fix to support multiple PHBs */
-    memory_region_add_subregion(get_system_io(), 0, &sphb->iospace);
 
-    sprintf(namebuf, "%s.io-alias", sphb->dtbusname);
-    memory_region_init_io(&sphb->iowindow, &spapr_io_ops, sphb,
-                          namebuf, SPAPR_PCI_IO_WIN_SIZE);
     memory_region_add_subregion(get_system_memory(), sphb->io_win_addr,
-                                &sphb->iowindow);
+                                &sphb->iospace);
 
     /* As MSI/MSIX interrupts trigger by writing at MSI/MSIX vectors,
      * we need to allocate some memory to catch those writes coming
diff --git a/hw/spapr_pci.h b/hw/spapr_pci.h
index e307ac8..a77d7d5 100644
--- a/hw/spapr_pci.h
+++ b/hw/spapr_pci.h
@@ -44,7 +44,7 @@ typedef struct sPAPRPHBState {
     MemoryRegion memspace, iospace;
     hwaddr mem_win_addr, mem_win_size, io_win_addr, io_win_size;
     hwaddr msi_win_addr;
-    MemoryRegion memwindow, iowindow, msiwindow;
+    MemoryRegion memwindow, msiwindow;
 
     uint32_t dma_liobn;
     uint64_t dma_window_start;
commit a1bc20dfbb012ea2a5fb1228cb77abd04490fd79
Author: Alexander Graf <agraf at suse.de>
Date:   Mon Oct 8 12:21:30 2012 +0200

    PPC: e500: Map PIO space into core memory region
    
    On PPC, we don't have PIO. So usually PIO space behind a PCI bridge is
    accessible via MMIO. Do this mapping explicitly by mapping the PIO space
    of our PCI bus into a memory region that lives in memory space.
    
    Signed-off-by: Alexander Graf <agraf at suse.de>

diff --git a/hw/ppc/e500.c b/hw/ppc/e500.c
index 90d88eb..6749fff 100644
--- a/hw/ppc/e500.c
+++ b/hw/ppc/e500.c
@@ -52,7 +52,6 @@
 #define MPC8544_PCI_REGS_BASE      (MPC8544_CCSRBAR_BASE + 0x8000ULL)
 #define MPC8544_PCI_REGS_SIZE      0x1000ULL
 #define MPC8544_PCI_IO             0xE1000000ULL
-#define MPC8544_PCI_IOLEN          0x10000ULL
 #define MPC8544_UTIL_BASE          (MPC8544_CCSRBAR_BASE + 0xe0000ULL)
 #define MPC8544_SPIN_BASE          0xEF000000ULL
 
@@ -511,7 +510,7 @@ void ppce500_init(PPCE500Params *params)
     if (!pci_bus)
         printf("couldn't create PCI controller!\n");
 
-    isa_mmio_init(MPC8544_PCI_IO, MPC8544_PCI_IOLEN);
+    sysbus_mmio_map(sysbus_from_qdev(dev), 1, MPC8544_PCI_IO);
 
     if (pci_bus) {
         /* Register network interfaces. */
diff --git a/hw/ppce500_pci.c b/hw/ppce500_pci.c
index 332748a..2ff7438 100644
--- a/hw/ppce500_pci.c
+++ b/hw/ppce500_pci.c
@@ -31,6 +31,8 @@
 #define PCIE500_ALL_SIZE      0x1000
 #define PCIE500_REG_SIZE      (PCIE500_ALL_SIZE - PCIE500_REG_BASE)
 
+#define PCIE500_PCI_IOLEN     0x10000ULL
+
 #define PPCE500_PCI_CONFIG_ADDR         0x0
 #define PPCE500_PCI_CONFIG_DATA         0x4
 #define PPCE500_PCI_INTACK              0x8
@@ -87,6 +89,7 @@ struct PPCE500PCIState {
     /* mmio maps */
     MemoryRegion container;
     MemoryRegion iomem;
+    MemoryRegion pio;
 };
 
 typedef struct PPCE500PCIState PPCE500PCIState;
@@ -314,7 +317,6 @@ static int e500_pcihost_initfn(SysBusDevice *dev)
     PCIBus *b;
     int i;
     MemoryRegion *address_space_mem = get_system_memory();
-    MemoryRegion *address_space_io = get_system_io();
 
     h = PCI_HOST_BRIDGE(dev);
     s = PPC_E500_PCI_HOST_BRIDGE(dev);
@@ -323,9 +325,11 @@ static int e500_pcihost_initfn(SysBusDevice *dev)
         sysbus_init_irq(dev, &s->irq[i]);
     }
 
+    memory_region_init(&s->pio, "pci-pio", PCIE500_PCI_IOLEN);
+
     b = pci_register_bus(DEVICE(dev), NULL, mpc85xx_pci_set_irq,
                          mpc85xx_pci_map_irq, s->irq, address_space_mem,
-                         address_space_io, PCI_DEVFN(0x11, 0), 4);
+                         &s->pio, PCI_DEVFN(0x11, 0), 4);
     h->bus = b;
 
     pci_create_simple(b, 0, "e500-host-bridge");
@@ -341,6 +345,7 @@ static int e500_pcihost_initfn(SysBusDevice *dev)
     memory_region_add_subregion(&s->container, PCIE500_CFGDATA, &h->data_mem);
     memory_region_add_subregion(&s->container, PCIE500_REG_BASE, &s->iomem);
     sysbus_init_mmio(dev, &s->container);
+    sysbus_init_mmio(dev, &s->pio);
 
     return 0;
 }
commit 626c7a171e644fbe1579516b8b794d611c295d2f
Author: Alexander Graf <agraf at suse.de>
Date:   Mon Oct 8 13:47:30 2012 +0200

    xen_platform: convert PIO to new memory api read/write
    
    Signed-off-by: Alexander Graf <agraf at suse.de>

diff --git a/hw/xen_platform.c b/hw/xen_platform.c
index 890eb72..a54e7a2 100644
--- a/hw/xen_platform.c
+++ b/hw/xen_platform.c
@@ -228,18 +228,46 @@ static void platform_fixed_ioport_reset(void *opaque)
     platform_fixed_ioport_writeb(s, 0, 0);
 }
 
-const MemoryRegionPortio xen_platform_ioport[] = {
-    { 0, 16, 4, .write = platform_fixed_ioport_writel, },
-    { 0, 16, 2, .write = platform_fixed_ioport_writew, },
-    { 0, 16, 1, .write = platform_fixed_ioport_writeb, },
-    { 0, 16, 2, .read = platform_fixed_ioport_readw, },
-    { 0, 16, 1, .read = platform_fixed_ioport_readb, },
-    PORTIO_END_OF_LIST()
-};
+static uint64_t platform_fixed_ioport_read(void *opaque,
+                                           hwaddr addr,
+                                           unsigned size)
+{
+    switch (size) {
+    case 1:
+        return platform_fixed_ioport_readb(opaque, addr);
+    case 2:
+        return platform_fixed_ioport_readw(opaque, addr);
+    default:
+        return -1;
+    }
+}
+
+static void platform_fixed_ioport_write(void *opaque, hwaddr addr,
+
+                                        uint64_t val, unsigned size)
+{
+    switch (size) {
+    case 1:
+        platform_fixed_ioport_writeb(opaque, addr, val);
+        break;
+    case 2:
+        platform_fixed_ioport_writew(opaque, addr, val);
+        break;
+    case 4:
+        platform_fixed_ioport_writel(opaque, addr, val);
+        break;
+    }
+}
+
 
 static const MemoryRegionOps platform_fixed_io_ops = {
-    .old_portio = xen_platform_ioport,
-    .endianness = DEVICE_NATIVE_ENDIAN,
+    .read = platform_fixed_ioport_read,
+    .write = platform_fixed_ioport_write,
+    .impl = {
+        .min_access_size = 1,
+        .max_access_size = 4,
+    },
+    .endianness = DEVICE_LITTLE_ENDIAN,
 };
 
 static void platform_fixed_ioport_init(PCIXenPlatformState* s)
commit 360d613e528df695fc0ac89f4069d8877d39ebea
Author: Alexander Graf <agraf at suse.de>
Date:   Mon Oct 8 13:44:24 2012 +0200

    vmport: convert PIO to new memory api read/write
    
    Signed-off-by: Alexander Graf <agraf at suse.de>

diff --git a/hw/vmport.c b/hw/vmport.c
index a4f52ee..3ab3a14 100644
--- a/hw/vmport.c
+++ b/hw/vmport.c
@@ -54,7 +54,8 @@ void vmport_register(unsigned char command, IOPortReadFunc *func, void *opaque)
     port_state->opaque[command] = opaque;
 }
 
-static uint32_t vmport_ioport_read(void *opaque, uint32_t addr)
+static uint64_t vmport_ioport_read(void *opaque, hwaddr addr,
+                                   unsigned size)
 {
     VMPortState *s = opaque;
     CPUX86State *env = cpu_single_env;
@@ -81,11 +82,12 @@ static uint32_t vmport_ioport_read(void *opaque, uint32_t addr)
     return s->func[command](s->opaque[command], addr);
 }
 
-static void vmport_ioport_write(void *opaque, uint32_t addr, uint32_t val)
+static void vmport_ioport_write(void *opaque, hwaddr addr,
+                                uint64_t val, unsigned size)
 {
     CPUX86State *env = cpu_single_env;
 
-    env->regs[R_EAX] = vmport_ioport_read(opaque, addr);
+    env->regs[R_EAX] = vmport_ioport_read(opaque, addr, 4);
 }
 
 static uint32_t vmport_cmd_get_version(void *opaque, uint32_t addr)
@@ -121,13 +123,14 @@ void vmmouse_set_data(const uint32_t *data)
     env->regs[R_ESI] = data[4]; env->regs[R_EDI] = data[5];
 }
 
-static const MemoryRegionPortio vmport_portio[] = {
-    {0, 1, 4, .read = vmport_ioport_read, .write = vmport_ioport_write },
-    PORTIO_END_OF_LIST(),
-};
-
 static const MemoryRegionOps vmport_ops = {
-    .old_portio = vmport_portio
+    .read = vmport_ioport_read,
+    .write = vmport_ioport_write,
+    .impl = {
+        .min_access_size = 4,
+        .max_access_size = 4,
+    },
+    .endianness = DEVICE_LITTLE_ENDIAN,
 };
 
 static int vmport_initfn(ISADevice *dev)
commit 5ec3a23e6c8303c51a244411c57f2456a24799c9
Author: Alexander Graf <agraf at suse.de>
Date:   Mon Oct 8 13:40:29 2012 +0200

    serial: convert PIO to new memory api read/write
    
    Signed-off-by: Alexander Graf <agraf at suse.de>

diff --git a/hw/serial.c b/hw/serial.c
index ae84b22..60283ea 100644
--- a/hw/serial.c
+++ b/hw/serial.c
@@ -26,6 +26,7 @@
 #include "serial.h"
 #include "qemu-char.h"
 #include "qemu-timer.h"
+#include "exec-memory.h"
 
 //#define DEBUG_SERIAL
 
@@ -305,7 +306,8 @@ static void serial_xmit(void *opaque)
 }
 
 
-static void serial_ioport_write(void *opaque, uint32_t addr, uint32_t val)
+static void serial_ioport_write(void *opaque, hwaddr addr, uint64_t val,
+                                unsigned size)
 {
     SerialState *s = opaque;
 
@@ -451,7 +453,7 @@ static void serial_ioport_write(void *opaque, uint32_t addr, uint32_t val)
     }
 }
 
-static uint32_t serial_ioport_read(void *opaque, uint32_t addr)
+static uint64_t serial_ioport_read(void *opaque, hwaddr addr, unsigned size)
 {
     SerialState *s = opaque;
     uint32_t ret;
@@ -620,7 +622,7 @@ static int serial_post_load(void *opaque, int version_id)
         s->fcr_vmstate = 0;
     }
     /* Initialize fcr via setter to perform essential side-effects */
-    serial_ioport_write(s, 0x02, s->fcr_vmstate);
+    serial_ioport_write(s, 0x02, s->fcr_vmstate, 1);
     serial_update_parameters(s);
     return 0;
 }
@@ -705,13 +707,14 @@ void serial_set_frequency(SerialState *s, uint32_t frequency)
     serial_update_parameters(s);
 }
 
-static const MemoryRegionPortio serial_portio[] = {
-    { 0, 8, 1, .read = serial_ioport_read, .write = serial_ioport_write },
-    PORTIO_END_OF_LIST()
-};
-
 const MemoryRegionOps serial_io_ops = {
-    .old_portio = serial_portio
+    .read = serial_ioport_read,
+    .write = serial_ioport_write,
+    .impl = {
+        .min_access_size = 1,
+        .max_access_size = 1,
+    },
+    .endianness = DEVICE_LITTLE_ENDIAN,
 };
 
 SerialState *serial_init(int base, qemu_irq irq, int baudbase,
@@ -728,8 +731,9 @@ SerialState *serial_init(int base, qemu_irq irq, int baudbase,
 
     vmstate_register(NULL, base, &vmstate_serial, s);
 
-    register_ioport_write(base, 8, 1, serial_ioport_write, s);
-    register_ioport_read(base, 8, 1, serial_ioport_read, s);
+    memory_region_init_io(&s->io, &serial_io_ops, s, "serial", 8);
+    memory_region_add_subregion(get_system_io(), base, &s->io);
+
     return s;
 }
 
@@ -738,7 +742,7 @@ static uint64_t serial_mm_read(void *opaque, hwaddr addr,
                                unsigned size)
 {
     SerialState *s = opaque;
-    return serial_ioport_read(s, addr >> s->it_shift);
+    return serial_ioport_read(s, addr >> s->it_shift, 1);
 }
 
 static void serial_mm_write(void *opaque, hwaddr addr,
@@ -746,7 +750,7 @@ static void serial_mm_write(void *opaque, hwaddr addr,
 {
     SerialState *s = opaque;
     value &= ~0u >> (32 - (size * 8));
-    serial_ioport_write(s, addr >> s->it_shift, value);
+    serial_ioport_write(s, addr >> s->it_shift, value, 1);
 }
 
 static const MemoryRegionOps serial_mm_ops[3] = {
commit 1bebb0ad17dc56f66ad09f35d0ab8a3a6357f548
Author: Alexander Graf <agraf at suse.de>
Date:   Mon Oct 8 13:35:24 2012 +0200

    rtl8139: convert PIO to new memory api read/write
    
    Signed-off-by: Alexander Graf <agraf at suse.de>

diff --git a/hw/rtl8139.c b/hw/rtl8139.c
index 10ced8b..e3aa8bf 100644
--- a/hw/rtl8139.c
+++ b/hw/rtl8139.c
@@ -3187,38 +3187,6 @@ static uint32_t rtl8139_io_readl(void *opaque, uint8_t addr)
 
 /* */
 
-static void rtl8139_ioport_writeb(void *opaque, uint32_t addr, uint32_t val)
-{
-    rtl8139_io_writeb(opaque, addr & 0xFF, val);
-}
-
-static void rtl8139_ioport_writew(void *opaque, uint32_t addr, uint32_t val)
-{
-    rtl8139_io_writew(opaque, addr & 0xFF, val);
-}
-
-static void rtl8139_ioport_writel(void *opaque, uint32_t addr, uint32_t val)
-{
-    rtl8139_io_writel(opaque, addr & 0xFF, val);
-}
-
-static uint32_t rtl8139_ioport_readb(void *opaque, uint32_t addr)
-{
-    return rtl8139_io_readb(opaque, addr & 0xFF);
-}
-
-static uint32_t rtl8139_ioport_readw(void *opaque, uint32_t addr)
-{
-    return rtl8139_io_readw(opaque, addr & 0xFF);
-}
-
-static uint32_t rtl8139_ioport_readl(void *opaque, uint32_t addr)
-{
-    return rtl8139_io_readl(opaque, addr & 0xFF);
-}
-
-/* */
-
 static void rtl8139_mmio_writeb(void *opaque, hwaddr addr, uint32_t val)
 {
     rtl8139_io_writeb(opaque, addr & 0xFF, val);
@@ -3386,18 +3354,44 @@ static const VMStateDescription vmstate_rtl8139 = {
 /***********************************************************/
 /* PCI RTL8139 definitions */
 
-static const MemoryRegionPortio rtl8139_portio[] = {
-    { 0, 0x100, 1, .read = rtl8139_ioport_readb, },
-    { 0, 0x100, 1, .write = rtl8139_ioport_writeb, },
-    { 0, 0x100, 2, .read = rtl8139_ioport_readw, },
-    { 0, 0x100, 2, .write = rtl8139_ioport_writew, },
-    { 0, 0x100, 4, .read = rtl8139_ioport_readl, },
-    { 0, 0x100, 4, .write = rtl8139_ioport_writel, },
-    PORTIO_END_OF_LIST()
-};
+static void rtl8139_ioport_write(void *opaque, hwaddr addr,
+                                 uint64_t val, unsigned size)
+{
+    switch (size) {
+    case 1:
+        rtl8139_io_writeb(opaque, addr, val);
+        break;
+    case 2:
+        rtl8139_io_writew(opaque, addr, val);
+        break;
+    case 4:
+        rtl8139_io_writel(opaque, addr, val);
+        break;
+    }
+}
+
+static uint64_t rtl8139_ioport_read(void *opaque, hwaddr addr,
+                                    unsigned size)
+{
+    switch (size) {
+    case 1:
+        return rtl8139_io_readb(opaque, addr);
+    case 2:
+        return rtl8139_io_readw(opaque, addr);
+    case 4:
+        return rtl8139_io_readl(opaque, addr);
+    }
+
+    return -1;
+}
 
 static const MemoryRegionOps rtl8139_io_ops = {
-    .old_portio = rtl8139_portio,
+    .read = rtl8139_ioport_read,
+    .write = rtl8139_ioport_write,
+    .impl = {
+        .min_access_size = 1,
+        .max_access_size = 4,
+    },
     .endianness = DEVICE_LITTLE_ENDIAN,
 };
 
commit d540bfe0e730ac0b18f58ed5a8355c8ee014f35d
Author: Alexander Graf <agraf at suse.de>
Date:   Mon Oct 8 13:30:08 2012 +0200

    pckbd: convert PIO to new memory api read/write
    
    Signed-off-by: Alexander Graf <agraf at suse.de>

diff --git a/hw/pckbd.c b/hw/pckbd.c
index 000c7f0..5bb3e0a 100644
--- a/hw/pckbd.c
+++ b/hw/pckbd.c
@@ -194,7 +194,8 @@ static void kbd_update_aux_irq(void *opaque, int level)
     kbd_update_irq(s);
 }
 
-static uint32_t kbd_read_status(void *opaque, uint32_t addr)
+static uint64_t kbd_read_status(void *opaque, hwaddr addr,
+                                unsigned size)
 {
     KBDState *s = opaque;
     int val;
@@ -223,7 +224,8 @@ static void outport_write(KBDState *s, uint32_t val)
     }
 }
 
-static void kbd_write_command(void *opaque, uint32_t addr, uint32_t val)
+static void kbd_write_command(void *opaque, hwaddr addr,
+                              uint64_t val, unsigned size)
 {
     KBDState *s = opaque;
 
@@ -303,12 +305,13 @@ static void kbd_write_command(void *opaque, uint32_t addr, uint32_t val)
         /* ignore that */
         break;
     default:
-        fprintf(stderr, "qemu: unsupported keyboard cmd=0x%02x\n", val);
+        fprintf(stderr, "qemu: unsupported keyboard cmd=0x%02x\n", (int)val);
         break;
     }
 }
 
-static uint32_t kbd_read_data(void *opaque, uint32_t addr)
+static uint64_t kbd_read_data(void *opaque, hwaddr addr,
+                              unsigned size)
 {
     KBDState *s = opaque;
     uint32_t val;
@@ -322,7 +325,8 @@ static uint32_t kbd_read_data(void *opaque, uint32_t addr)
     return val;
 }
 
-static void kbd_write_data(void *opaque, uint32_t addr, uint32_t val)
+static void kbd_write_data(void *opaque, hwaddr addr,
+                           uint64_t val, unsigned size)
 {
     KBDState *s = opaque;
 
@@ -385,9 +389,9 @@ static uint32_t kbd_mm_readb (void *opaque, hwaddr addr)
     KBDState *s = opaque;
 
     if (addr & s->mask)
-        return kbd_read_status(s, 0) & 0xff;
+        return kbd_read_status(s, 0, 1) & 0xff;
     else
-        return kbd_read_data(s, 0) & 0xff;
+        return kbd_read_data(s, 0, 1) & 0xff;
 }
 
 static void kbd_mm_writeb (void *opaque, hwaddr addr, uint32_t value)
@@ -395,9 +399,9 @@ static void kbd_mm_writeb (void *opaque, hwaddr addr, uint32_t value)
     KBDState *s = opaque;
 
     if (addr & s->mask)
-        kbd_write_command(s, 0, value & 0xff);
+        kbd_write_command(s, 0, value & 0xff, 1);
     else
-        kbd_write_data(s, 0, value & 0xff);
+        kbd_write_data(s, 0, value & 0xff, 1);
 }
 
 static const MemoryRegionOps i8042_mmio_ops = {
@@ -459,22 +463,24 @@ static const VMStateDescription vmstate_kbd_isa = {
     }
 };
 
-static const MemoryRegionPortio i8042_data_portio[] = {
-    { 0, 1, 1, .read = kbd_read_data, .write = kbd_write_data },
-    PORTIO_END_OF_LIST()
-};
-
-static const MemoryRegionPortio i8042_cmd_portio[] = {
-    { 0, 1, 1, .read = kbd_read_status, .write = kbd_write_command },
-    PORTIO_END_OF_LIST()
-};
-
 static const MemoryRegionOps i8042_data_ops = {
-    .old_portio = i8042_data_portio
+    .read = kbd_read_data,
+    .write = kbd_write_data,
+    .impl = {
+        .min_access_size = 1,
+        .max_access_size = 1,
+    },
+    .endianness = DEVICE_LITTLE_ENDIAN,
 };
 
 static const MemoryRegionOps i8042_cmd_ops = {
-    .old_portio = i8042_cmd_portio
+    .read = kbd_read_status,
+    .write = kbd_write_command,
+    .impl = {
+        .min_access_size = 1,
+        .max_access_size = 1,
+    },
+    .endianness = DEVICE_LITTLE_ENDIAN,
 };
 
 static int i8042_initfn(ISADevice *dev)
commit 93ef41928299efbd7879bc63f329893e65e6e1de
Author: Alexander Graf <agraf at suse.de>
Date:   Mon Oct 8 13:24:52 2012 +0200

    pc port92: convert PIO to new memory api read/write
    
    Signed-off-by: Alexander Graf <agraf at suse.de>

diff --git a/hw/pc.c b/hw/pc.c
index 16de04c..a02b397 100644
--- a/hw/pc.c
+++ b/hw/pc.c
@@ -421,7 +421,8 @@ typedef struct Port92State {
     qemu_irq *a20_out;
 } Port92State;
 
-static void port92_write(void *opaque, uint32_t addr, uint32_t val)
+static void port92_write(void *opaque, hwaddr addr, uint64_t val,
+                         unsigned size)
 {
     Port92State *s = opaque;
 
@@ -433,7 +434,8 @@ static void port92_write(void *opaque, uint32_t addr, uint32_t val)
     }
 }
 
-static uint32_t port92_read(void *opaque, uint32_t addr)
+static uint64_t port92_read(void *opaque, hwaddr addr,
+                            unsigned size)
 {
     Port92State *s = opaque;
     uint32_t ret;
@@ -468,13 +470,14 @@ static void port92_reset(DeviceState *d)
     s->outport &= ~1;
 }
 
-static const MemoryRegionPortio port92_portio[] = {
-    { 0, 1, 1, .read = port92_read, .write = port92_write },
-    PORTIO_END_OF_LIST(),
-};
-
 static const MemoryRegionOps port92_ops = {
-    .old_portio = port92_portio
+    .read = port92_read,
+    .write = port92_write,
+    .impl = {
+        .min_access_size = 1,
+        .max_access_size = 1,
+    },
+    .endianness = DEVICE_LITTLE_ENDIAN,
 };
 
 static int port92_initfn(ISADevice *dev)
commit 0da8c842b73924fb54ed75fb8850be5f7e978df9
Author: Alexander Graf <agraf at suse.de>
Date:   Mon Oct 8 13:22:28 2012 +0200

    mc146818rtc: convert PIO to new memory api read/write
    
    Signed-off-by: Alexander Graf <agraf at suse.de>

diff --git a/hw/mc146818rtc.c b/hw/mc146818rtc.c
index 332a77d..98839f2 100644
--- a/hw/mc146818rtc.c
+++ b/hw/mc146818rtc.c
@@ -383,7 +383,8 @@ static void rtc_update_timer(void *opaque)
     check_update_timer(s);
 }
 
-static void cmos_ioport_write(void *opaque, uint32_t addr, uint32_t data)
+static void cmos_ioport_write(void *opaque, hwaddr addr,
+                              uint64_t data, unsigned size)
 {
     RTCState *s = opaque;
 
@@ -595,7 +596,8 @@ static int update_in_progress(RTCState *s)
     return 0;
 }
 
-static uint32_t cmos_ioport_read(void *opaque, uint32_t addr)
+static uint64_t cmos_ioport_read(void *opaque, hwaddr addr,
+                                 unsigned size)
 {
     RTCState *s = opaque;
     int ret;
@@ -769,13 +771,14 @@ static void rtc_reset(void *opaque)
 #endif
 }
 
-static const MemoryRegionPortio cmos_portio[] = {
-    {0, 2, 1, .read = cmos_ioport_read, .write = cmos_ioport_write },
-    PORTIO_END_OF_LIST(),
-};
-
 static const MemoryRegionOps cmos_ops = {
-    .old_portio = cmos_portio
+    .read = cmos_ioport_read,
+    .write = cmos_ioport_write,
+    .impl = {
+        .min_access_size = 1,
+        .max_access_size = 1,
+    },
+    .endianness = DEVICE_LITTLE_ENDIAN,
 };
 
 static void rtc_get_date(Object *obj, Visitor *v, void *opaque,
commit 087bd055acf29e838908a9e6bfad73f12c49bc4c
Author: Alexander Graf <agraf at suse.de>
Date:   Mon Oct 8 13:19:48 2012 +0200

    m48t59: convert PIO to new memory api read/write
    
    Signed-off-by: Alexander Graf <agraf at suse.de>

diff --git a/hw/m48t59.c b/hw/m48t59.c
index 9eb1a09..9e8e692 100644
--- a/hw/m48t59.c
+++ b/hw/m48t59.c
@@ -27,6 +27,7 @@
 #include "sysemu.h"
 #include "sysbus.h"
 #include "isa.h"
+#include "exec-memory.h"
 
 //#define DEBUG_NVRAM
 
@@ -80,6 +81,7 @@ typedef struct M48t59ISAState {
 typedef struct M48t59SysBusState {
     SysBusDevice busdev;
     M48t59State state;
+    MemoryRegion io;
 } M48t59SysBusState;
 
 /* Fake timer functions */
@@ -481,7 +483,8 @@ void m48t59_toggle_lock (void *opaque, int lock)
 }
 
 /* IO access to NVRAM */
-static void NVRAM_writeb (void *opaque, uint32_t addr, uint32_t val)
+static void NVRAM_writeb(void *opaque, hwaddr addr, uint64_t val,
+                         unsigned size)
 {
     M48t59State *NVRAM = opaque;
 
@@ -504,7 +507,7 @@ static void NVRAM_writeb (void *opaque, uint32_t addr, uint32_t val)
     }
 }
 
-static uint32_t NVRAM_readb (void *opaque, uint32_t addr)
+static uint64_t NVRAM_readb(void *opaque, hwaddr addr, unsigned size)
 {
     M48t59State *NVRAM = opaque;
     uint32_t retval;
@@ -626,13 +629,14 @@ static void m48t59_reset_sysbus(DeviceState *d)
     m48t59_reset_common(NVRAM);
 }
 
-static const MemoryRegionPortio m48t59_portio[] = {
-    {0, 4, 1, .read = NVRAM_readb, .write = NVRAM_writeb },
-    PORTIO_END_OF_LIST(),
-};
-
 static const MemoryRegionOps m48t59_io_ops = {
-    .old_portio = m48t59_portio,
+    .read = NVRAM_readb,
+    .write = NVRAM_writeb,
+    .impl = {
+        .min_access_size = 1,
+        .max_access_size = 1,
+    },
+    .endianness = DEVICE_LITTLE_ENDIAN,
 };
 
 /* Initialisation routine */
@@ -653,9 +657,9 @@ M48t59State *m48t59_init(qemu_irq IRQ, hwaddr mem_base,
     d = FROM_SYSBUS(M48t59SysBusState, s);
     state = &d->state;
     sysbus_connect_irq(s, 0, IRQ);
+    memory_region_init_io(&d->io, &m48t59_io_ops, state, "m48t59", 4);
     if (io_base != 0) {
-        register_ioport_read(io_base, 0x04, 1, NVRAM_readb, state);
-        register_ioport_write(io_base, 0x04, 1, NVRAM_writeb, state);
+        memory_region_add_subregion(get_system_io(), io_base, &d->io);
     }
     if (mem_base != 0) {
         sysbus_mmio_map(s, 0, mem_base);
commit 0505bcdec8228d8de39ab1a02644e71999e7c052
Author: Alexander Graf <agraf at suse.de>
Date:   Mon Oct 8 13:12:31 2012 +0200

    i8254: convert PIO to new memory api read/write
    
    Signed-off-by: Alexander Graf <agraf at suse.de>

diff --git a/hw/i8254.c b/hw/i8254.c
index 77bd5e8..bea5f92 100644
--- a/hw/i8254.c
+++ b/hw/i8254.c
@@ -111,7 +111,8 @@ static void pit_latch_count(PITChannelState *s)
     }
 }
 
-static void pit_ioport_write(void *opaque, uint32_t addr, uint32_t val)
+static void pit_ioport_write(void *opaque, hwaddr addr,
+                             uint64_t val, unsigned size)
 {
     PITCommonState *pit = opaque;
     int channel, access;
@@ -178,7 +179,8 @@ static void pit_ioport_write(void *opaque, uint32_t addr, uint32_t val)
     }
 }
 
-static uint32_t pit_ioport_read(void *opaque, uint32_t addr)
+static uint64_t pit_ioport_read(void *opaque, hwaddr addr,
+                                unsigned size)
 {
     PITCommonState *pit = opaque;
     int ret, count;
@@ -290,14 +292,14 @@ static void pit_irq_control(void *opaque, int n, int enable)
     }
 }
 
-static const MemoryRegionPortio pit_portio[] = {
-    { 0, 4, 1, .write = pit_ioport_write },
-    { 0, 3, 1, .read = pit_ioport_read },
-    PORTIO_END_OF_LIST()
-};
-
 static const MemoryRegionOps pit_ioport_ops = {
-    .old_portio = pit_portio
+    .read = pit_ioport_read,
+    .write = pit_ioport_write,
+    .impl = {
+        .min_access_size = 1,
+        .max_access_size = 1,
+    },
+    .endianness = DEVICE_LITTLE_ENDIAN,
 };
 
 static void pit_post_load(PITCommonState *s)
commit f3726fd78d79693b73c1ac41cc8ca0f1bb90082b
Author: Alexander Graf <agraf at suse.de>
Date:   Mon Oct 8 13:09:44 2012 +0200

    es1370: convert PIO to new memory api read/write
    
    Signed-off-by: Alexander Graf <agraf at suse.de>

diff --git a/hw/es1370.c b/hw/es1370.c
index e34234c..e0c9729 100644
--- a/hw/es1370.c
+++ b/hw/es1370.c
@@ -908,18 +908,44 @@ static void es1370_adc_callback (void *opaque, int avail)
     es1370_run_channel (s, ADC_CHANNEL, avail);
 }
 
-static const MemoryRegionPortio es1370_portio[] = {
-    { 0, 0x40 * 4, 1, .write = es1370_writeb, },
-    { 0, 0x40 * 2, 2, .write = es1370_writew, },
-    { 0, 0x40, 4, .write = es1370_writel, },
-    { 0, 0x40 * 4, 1, .read = es1370_readb, },
-    { 0, 0x40 * 2, 2, .read = es1370_readw, },
-    { 0, 0x40, 4, .read = es1370_readl, },
-    PORTIO_END_OF_LIST ()
-};
+static uint64_t es1370_read(void *opaque, hwaddr addr,
+                            unsigned size)
+{
+    switch (size) {
+    case 1:
+        return es1370_readb(opaque, addr);
+    case 2:
+        return es1370_readw(opaque, addr);
+    case 4:
+        return es1370_readl(opaque, addr);
+    default:
+        return -1;
+    }
+}
+
+static void es1370_write(void *opaque, hwaddr addr, uint64_t val,
+                      unsigned size)
+{
+    switch (size) {
+    case 1:
+        es1370_writeb(opaque, addr, val);
+        break;
+    case 2:
+        es1370_writew(opaque, addr, val);
+        break;
+    case 4:
+        es1370_writel(opaque, addr, val);
+        break;
+    }
+}
 
 static const MemoryRegionOps es1370_io_ops = {
-    .old_portio = es1370_portio,
+    .read = es1370_read,
+    .write = es1370_write,
+    .impl = {
+        .min_access_size = 1,
+        .max_access_size = 4,
+    },
     .endianness = DEVICE_LITTLE_ENDIAN,
 };
 
commit df6db5b32ab776a1d23bd32ae75c0e5284a01234
Author: Alexander Graf <agraf at suse.de>
Date:   Mon Oct 8 13:02:30 2012 +0200

    virtio-pci: convert PIO to new memory api read/write
    
    Signed-off-by: Alexander Graf <agraf at suse.de>

diff --git a/hw/virtio-pci.c b/hw/virtio-pci.c
index c7f20c3..9603150 100644
--- a/hw/virtio-pci.c
+++ b/hw/virtio-pci.c
@@ -374,79 +374,39 @@ static uint32_t virtio_ioport_read(VirtIOPCIProxy *proxy, uint32_t addr)
     return ret;
 }
 
-static uint32_t virtio_pci_config_readb(void *opaque, uint32_t addr)
-{
-    VirtIOPCIProxy *proxy = opaque;
-    uint32_t config = VIRTIO_PCI_CONFIG(&proxy->pci_dev);
-    if (addr < config)
-        return virtio_ioport_read(proxy, addr);
-    addr -= config;
-    return virtio_config_readb(proxy->vdev, addr);
-}
-
-static uint32_t virtio_pci_config_readw(void *opaque, uint32_t addr)
-{
-    VirtIOPCIProxy *proxy = opaque;
-    uint32_t config = VIRTIO_PCI_CONFIG(&proxy->pci_dev);
-    uint16_t val;
-    if (addr < config)
-        return virtio_ioport_read(proxy, addr);
-    addr -= config;
-    val = virtio_config_readw(proxy->vdev, addr);
-    if (virtio_is_big_endian()) {
-        /*
-         * virtio is odd, ioports are LE but config space is target native
-         * endian. However, in qemu, all PIO is LE, so we need to re-swap
-         * on BE targets
-         */
-        val = bswap16(val);
-    }
-    return val;
-}
-
-static uint32_t virtio_pci_config_readl(void *opaque, uint32_t addr)
-{
-    VirtIOPCIProxy *proxy = opaque;
-    uint32_t config = VIRTIO_PCI_CONFIG(&proxy->pci_dev);
-    uint32_t val;
-    if (addr < config)
-        return virtio_ioport_read(proxy, addr);
-    addr -= config;
-    val = virtio_config_readl(proxy->vdev, addr);
-    if (virtio_is_big_endian()) {
-        val = bswap32(val);
-    }
-    return val;
-}
-
-static void virtio_pci_config_writeb(void *opaque, uint32_t addr, uint32_t val)
+static uint64_t virtio_pci_config_read(void *opaque, hwaddr addr,
+                                       unsigned size)
 {
     VirtIOPCIProxy *proxy = opaque;
     uint32_t config = VIRTIO_PCI_CONFIG(&proxy->pci_dev);
+    uint64_t val = 0;
     if (addr < config) {
-        virtio_ioport_write(proxy, addr, val);
-        return;
+        return virtio_ioport_read(proxy, addr);
     }
     addr -= config;
-    virtio_config_writeb(proxy->vdev, addr, val);
-}
 
-static void virtio_pci_config_writew(void *opaque, uint32_t addr, uint32_t val)
-{
-    VirtIOPCIProxy *proxy = opaque;
-    uint32_t config = VIRTIO_PCI_CONFIG(&proxy->pci_dev);
-    if (addr < config) {
-        virtio_ioport_write(proxy, addr, val);
-        return;
-    }
-    addr -= config;
-    if (virtio_is_big_endian()) {
-        val = bswap16(val);
+    switch (size) {
+    case 1:
+        val = virtio_config_readb(proxy->vdev, addr);
+        break;
+    case 2:
+        val = virtio_config_readw(proxy->vdev, addr);
+        if (virtio_is_big_endian()) {
+            val = bswap16(val);
+        }
+        break;
+    case 4:
+        val = virtio_config_readl(proxy->vdev, addr);
+        if (virtio_is_big_endian()) {
+            val = bswap32(val);
+        }
+        break;
     }
-    virtio_config_writew(proxy->vdev, addr, val);
+    return val;
 }
 
-static void virtio_pci_config_writel(void *opaque, uint32_t addr, uint32_t val)
+static void virtio_pci_config_write(void *opaque, hwaddr addr,
+                                    uint64_t val, unsigned size)
 {
     VirtIOPCIProxy *proxy = opaque;
     uint32_t config = VIRTIO_PCI_CONFIG(&proxy->pci_dev);
@@ -455,24 +415,36 @@ static void virtio_pci_config_writel(void *opaque, uint32_t addr, uint32_t val)
         return;
     }
     addr -= config;
-    if (virtio_is_big_endian()) {
-        val = bswap32(val);
+    /*
+     * Virtio-PCI is odd. Ioports are LE but config space is target native
+     * endian.
+     */
+    switch (size) {
+    case 1:
+        virtio_config_writeb(proxy->vdev, addr, val);
+        break;
+    case 2:
+        if (virtio_is_big_endian()) {
+            val = bswap16(val);
+        }
+        virtio_config_writew(proxy->vdev, addr, val);
+        break;
+    case 4:
+        if (virtio_is_big_endian()) {
+            val = bswap32(val);
+        }
+        virtio_config_writel(proxy->vdev, addr, val);
+        break;
     }
-    virtio_config_writel(proxy->vdev, addr, val);
 }
 
-static const MemoryRegionPortio virtio_portio[] = {
-    { 0, 0x10000, 1, .write = virtio_pci_config_writeb, },
-    { 0, 0x10000, 2, .write = virtio_pci_config_writew, },
-    { 0, 0x10000, 4, .write = virtio_pci_config_writel, },
-    { 0, 0x10000, 1, .read = virtio_pci_config_readb, },
-    { 0, 0x10000, 2, .read = virtio_pci_config_readw, },
-    { 0, 0x10000, 4, .read = virtio_pci_config_readl, },
-    PORTIO_END_OF_LIST()
-};
-
 static const MemoryRegionOps virtio_pci_config_ops = {
-    .old_portio = virtio_portio,
+    .read = virtio_pci_config_read,
+    .write = virtio_pci_config_write,
+    .impl = {
+        .min_access_size = 1,
+        .max_access_size = 4,
+    },
     .endianness = DEVICE_LITTLE_ENDIAN,
 };
 
commit d6a6d362aa291686d3111aed59ab57badaaef463
Author: Alexander Graf <agraf at suse.de>
Date:   Mon Oct 8 13:02:20 2012 +0200

    ac97: convert PIO to new memory api read/write
    
    Signed-off-by: Alexander Graf <agraf at suse.de>

diff --git a/hw/ac97.c b/hw/ac97.c
index 0f561fa..ce6a1dc 100644
--- a/hw/ac97.c
+++ b/hw/ac97.c
@@ -1226,32 +1226,101 @@ static const VMStateDescription vmstate_ac97 = {
     }
 };
 
-static const MemoryRegionPortio nam_portio[] = {
-    { 0, 256 * 1, 1, .read = nam_readb, },
-    { 0, 256 * 2, 2, .read = nam_readw, },
-    { 0, 256 * 4, 4, .read = nam_readl, },
-    { 0, 256 * 1, 1, .write = nam_writeb, },
-    { 0, 256 * 2, 2, .write = nam_writew, },
-    { 0, 256 * 4, 4, .write = nam_writel, },
-    PORTIO_END_OF_LIST (),
-};
+static uint64_t nam_read(void *opaque, hwaddr addr, unsigned size)
+{
+    if ((addr / size) > 256) {
+        return -1;
+    }
+
+    switch (size) {
+    case 1:
+        return nam_readb(opaque, addr);
+    case 2:
+        return nam_readw(opaque, addr);
+    case 4:
+        return nam_readl(opaque, addr);
+    default:
+        return -1;
+    }
+}
+
+static void nam_write(void *opaque, hwaddr addr, uint64_t val,
+                      unsigned size)
+{
+    if ((addr / size) > 256) {
+        return;
+    }
+
+    switch (size) {
+    case 1:
+        nam_writeb(opaque, addr, val);
+        break;
+    case 2:
+        nam_writew(opaque, addr, val);
+        break;
+    case 4:
+        nam_writel(opaque, addr, val);
+        break;
+    }
+}
 
 static const MemoryRegionOps ac97_io_nam_ops = {
-    .old_portio = nam_portio,
+    .read = nam_read,
+    .write = nam_write,
+    .impl = {
+        .min_access_size = 1,
+        .max_access_size = 4,
+    },
+    .endianness = DEVICE_LITTLE_ENDIAN,
 };
 
-static const MemoryRegionPortio nabm_portio[] = {
-    { 0, 64 * 1, 1, .read = nabm_readb, },
-    { 0, 64 * 2, 2, .read = nabm_readw, },
-    { 0, 64 * 4, 4, .read = nabm_readl, },
-    { 0, 64 * 1, 1, .write = nabm_writeb, },
-    { 0, 64 * 2, 2, .write = nabm_writew, },
-    { 0, 64 * 4, 4, .write = nabm_writel, },
-    PORTIO_END_OF_LIST ()
-};
+static uint64_t nabm_read(void *opaque, hwaddr addr, unsigned size)
+{
+    if ((addr / size) > 64) {
+        return -1;
+    }
+
+    switch (size) {
+    case 1:
+        return nabm_readb(opaque, addr);
+    case 2:
+        return nabm_readw(opaque, addr);
+    case 4:
+        return nabm_readl(opaque, addr);
+    default:
+        return -1;
+    }
+}
+
+static void nabm_write(void *opaque, hwaddr addr, uint64_t val,
+                      unsigned size)
+{
+    if ((addr / size) > 64) {
+        return;
+    }
+
+    switch (size) {
+    case 1:
+        nabm_writeb(opaque, addr, val);
+        break;
+    case 2:
+        nabm_writew(opaque, addr, val);
+        break;
+    case 4:
+        nabm_writel(opaque, addr, val);
+        break;
+    }
+}
+
 
 static const MemoryRegionOps ac97_io_nabm_ops = {
-    .old_portio = nabm_portio,
+    .read = nabm_read,
+    .write = nabm_write,
+    .impl = {
+        .min_access_size = 1,
+        .max_access_size = 4,
+    },
+    .endianness = DEVICE_LITTLE_ENDIAN,
 };
 
 static void ac97_on_reset (void *opaque)
commit 74d042e5cec78090194669a8a4417c0a49d46a3d
Author: David Gibson <david at gibson.dropbear.id.au>
Date:   Mon Oct 8 18:17:39 2012 +0000

    pseries: Implement qemu initiated shutdowns using EPOW events
    
    At present, using 'system_powerdown' from the monitor or otherwise
    instructing qemu to (cleanly) shut down a pseries guest will not work,
    because we did not have a method of signalling the shutdown request to the
    guest.
    
    PAPR does include a usable mechanism for this, though it is rather more
    involved than the equivalent on x86.  This involves sending an EPOW
    (Environmental and POwer Warning) event through the PAPR event and error
    logging mechanism, which also has a number of other functions.
    
    This patch implements just enough of the event/error logging functionality
    to be able to send a shutdown event to the guest.  At least with modern
    guest kernels and a userspace that is up and running, this means that
    system_powerdown from the qemu monitor should now work correctly on pseries
    guests.
    
    Signed-off-by: David Gibson <david at gibson.dropbear.id.au>
    Signed-off-by: Alexander Graf <agraf at suse.de>

diff --git a/hw/ppc/Makefile.objs b/hw/ppc/Makefile.objs
index 951e407..8fe2123 100644
--- a/hw/ppc/Makefile.objs
+++ b/hw/ppc/Makefile.objs
@@ -11,6 +11,7 @@ obj-y += ppc_newworld.o
 obj-$(CONFIG_PSERIES) += spapr.o spapr_hcall.o spapr_rtas.o spapr_vio.o
 obj-$(CONFIG_PSERIES) += xics.o spapr_vty.o spapr_llan.o spapr_vscsi.o
 obj-$(CONFIG_PSERIES) += spapr_pci.o pci-hotplug.o spapr_iommu.o
+obj-$(CONFIG_PSERIES) += spapr_events.o
 # PowerPC 4xx boards
 obj-y += ppc4xx_devs.o ppc4xx_pci.o ppc405_uc.o ppc405_boards.o
 obj-y += ppc440_bamboo.o
diff --git a/hw/spapr.c b/hw/spapr.c
index 1587bc3..8d0ad3c 100644
--- a/hw/spapr.c
+++ b/hw/spapr.c
@@ -232,7 +232,8 @@ static void *spapr_create_fdt_skel(const char *cpu_model,
                                    hwaddr initrd_size,
                                    hwaddr kernel_size,
                                    const char *boot_device,
-                                   const char *kernel_cmdline)
+                                   const char *kernel_cmdline,
+                                   uint32_t epow_irq)
 {
     void *fdt;
     CPUPPCState *env;
@@ -403,6 +404,8 @@ static void *spapr_create_fdt_skel(const char *cpu_model,
     _FDT((fdt_property(fdt, "ibm,associativity-reference-points",
         refpoints, sizeof(refpoints))));
 
+    _FDT((fdt_property_cell(fdt, "rtas-error-log-max", RTAS_ERROR_LOG_MAX)));
+
     _FDT((fdt_end_node(fdt)));
 
     /* interrupt controller */
@@ -433,6 +436,9 @@ static void *spapr_create_fdt_skel(const char *cpu_model,
 
     _FDT((fdt_end_node(fdt)));
 
+    /* event-sources */
+    spapr_events_fdt_skel(fdt, epow_irq);
+
     _FDT((fdt_end_node(fdt))); /* close root node */
     _FDT((fdt_finish(fdt)));
 
@@ -795,6 +801,9 @@ static void ppc_spapr_init(QEMUMachineInitArgs *args)
     spapr->icp = xics_system_init(XICS_IRQS);
     spapr->next_irq = 16;
 
+    /* Set up EPOW events infrastructure */
+    spapr_events_init(spapr);
+
     /* Set up IOMMU */
     spapr_iommu_init();
 
@@ -903,7 +912,8 @@ static void ppc_spapr_init(QEMUMachineInitArgs *args)
     spapr->fdt_skel = spapr_create_fdt_skel(cpu_model,
                                             initrd_base, initrd_size,
                                             kernel_size,
-                                            boot_device, kernel_cmdline);
+                                            boot_device, kernel_cmdline,
+                                            spapr->epow_irq);
     assert(spapr->fdt_skel != NULL);
 }
 
diff --git a/hw/spapr.h b/hw/spapr.h
index 8ee69bd..51c709e 100644
--- a/hw/spapr.h
+++ b/hw/spapr.h
@@ -26,6 +26,9 @@ typedef struct sPAPREnvironment {
     int rtc_offset;
     char *cpu_model;
     bool has_graphics;
+
+    uint32_t epow_irq;
+    Notifier epow_notifier;
 } sPAPREnvironment;
 
 #define H_SUCCESS         0
@@ -335,7 +338,12 @@ typedef struct sPAPRTCE {
 #define SPAPR_VIO_BASE_LIOBN    0x00000000
 #define SPAPR_PCI_BASE_LIOBN    0x80000000
 
+#define RTAS_ERROR_LOG_MAX      2048
+
+
 void spapr_iommu_init(void);
+void spapr_events_init(sPAPREnvironment *spapr);
+void spapr_events_fdt_skel(void *fdt, uint32_t epow_irq);
 DMAContext *spapr_tce_new_dma_context(uint32_t liobn, size_t window_size);
 void spapr_tce_free(DMAContext *dma);
 void spapr_tce_reset(DMAContext *dma);
diff --git a/hw/spapr_events.c b/hw/spapr_events.c
new file mode 100644
index 0000000..18ccd4a
--- /dev/null
+++ b/hw/spapr_events.c
@@ -0,0 +1,321 @@
+/*
+ * QEMU PowerPC pSeries Logical Partition (aka sPAPR) hardware System Emulator
+ *
+ * RTAS events handling
+ *
+ * Copyright (c) 2012 David Gibson, IBM Corporation.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ *
+ */
+#include "cpu.h"
+#include "sysemu.h"
+#include "qemu-char.h"
+#include "hw/qdev.h"
+#include "device_tree.h"
+
+#include "hw/spapr.h"
+#include "hw/spapr_vio.h"
+
+#include <libfdt.h>
+
+struct rtas_error_log {
+    uint32_t summary;
+#define RTAS_LOG_VERSION_MASK                   0xff000000
+#define   RTAS_LOG_VERSION_6                    0x06000000
+#define RTAS_LOG_SEVERITY_MASK                  0x00e00000
+#define   RTAS_LOG_SEVERITY_ALREADY_REPORTED    0x00c00000
+#define   RTAS_LOG_SEVERITY_FATAL               0x00a00000
+#define   RTAS_LOG_SEVERITY_ERROR               0x00800000
+#define   RTAS_LOG_SEVERITY_ERROR_SYNC          0x00600000
+#define   RTAS_LOG_SEVERITY_WARNING             0x00400000
+#define   RTAS_LOG_SEVERITY_EVENT               0x00200000
+#define   RTAS_LOG_SEVERITY_NO_ERROR            0x00000000
+#define RTAS_LOG_DISPOSITION_MASK               0x00180000
+#define   RTAS_LOG_DISPOSITION_FULLY_RECOVERED  0x00000000
+#define   RTAS_LOG_DISPOSITION_LIMITED_RECOVERY 0x00080000
+#define   RTAS_LOG_DISPOSITION_NOT_RECOVERED    0x00100000
+#define RTAS_LOG_OPTIONAL_PART_PRESENT          0x00040000
+#define RTAS_LOG_INITIATOR_MASK                 0x0000f000
+#define   RTAS_LOG_INITIATOR_UNKNOWN            0x00000000
+#define   RTAS_LOG_INITIATOR_CPU                0x00001000
+#define   RTAS_LOG_INITIATOR_PCI                0x00002000
+#define   RTAS_LOG_INITIATOR_MEMORY             0x00004000
+#define   RTAS_LOG_INITIATOR_HOTPLUG            0x00006000
+#define RTAS_LOG_TARGET_MASK                    0x00000f00
+#define   RTAS_LOG_TARGET_UNKNOWN               0x00000000
+#define   RTAS_LOG_TARGET_CPU                   0x00000100
+#define   RTAS_LOG_TARGET_PCI                   0x00000200
+#define   RTAS_LOG_TARGET_MEMORY                0x00000400
+#define   RTAS_LOG_TARGET_HOTPLUG               0x00000600
+#define RTAS_LOG_TYPE_MASK                      0x000000ff
+#define   RTAS_LOG_TYPE_OTHER                   0x00000000
+#define   RTAS_LOG_TYPE_RETRY                   0x00000001
+#define   RTAS_LOG_TYPE_TCE_ERR                 0x00000002
+#define   RTAS_LOG_TYPE_INTERN_DEV_FAIL         0x00000003
+#define   RTAS_LOG_TYPE_TIMEOUT                 0x00000004
+#define   RTAS_LOG_TYPE_DATA_PARITY             0x00000005
+#define   RTAS_LOG_TYPE_ADDR_PARITY             0x00000006
+#define   RTAS_LOG_TYPE_CACHE_PARITY            0x00000007
+#define   RTAS_LOG_TYPE_ADDR_INVALID            0x00000008
+#define   RTAS_LOG_TYPE_ECC_UNCORR              0x00000009
+#define   RTAS_LOG_TYPE_ECC_CORR                0x0000000a
+#define   RTAS_LOG_TYPE_EPOW                    0x00000040
+    uint32_t extended_length;
+} QEMU_PACKED;
+
+struct rtas_event_log_v6 {
+    uint8_t b0;
+#define RTAS_LOG_V6_B0_VALID                          0x80
+#define RTAS_LOG_V6_B0_UNRECOVERABLE_ERROR            0x40
+#define RTAS_LOG_V6_B0_RECOVERABLE_ERROR              0x20
+#define RTAS_LOG_V6_B0_DEGRADED_OPERATION             0x10
+#define RTAS_LOG_V6_B0_PREDICTIVE_ERROR               0x08
+#define RTAS_LOG_V6_B0_NEW_LOG                        0x04
+#define RTAS_LOG_V6_B0_BIGENDIAN                      0x02
+    uint8_t _resv1;
+    uint8_t b2;
+#define RTAS_LOG_V6_B2_POWERPC_FORMAT                 0x80
+#define RTAS_LOG_V6_B2_LOG_FORMAT_MASK                0x0f
+#define   RTAS_LOG_V6_B2_LOG_FORMAT_PLATFORM_EVENT    0x0e
+    uint8_t _resv2[9];
+    uint32_t company;
+#define RTAS_LOG_V6_COMPANY_IBM                 0x49424d00 /* IBM<null> */
+} QEMU_PACKED;
+
+struct rtas_event_log_v6_section_header {
+    uint16_t section_id;
+    uint16_t section_length;
+    uint8_t section_version;
+    uint8_t section_subtype;
+    uint16_t creator_component_id;
+} QEMU_PACKED;
+
+struct rtas_event_log_v6_maina {
+#define RTAS_LOG_V6_SECTION_ID_MAINA                0x5048 /* PH */
+    struct rtas_event_log_v6_section_header hdr;
+    uint32_t creation_date; /* BCD: YYYYMMDD */
+    uint32_t creation_time; /* BCD: HHMMSS00 */
+    uint8_t _platform1[8];
+    char creator_id;
+    uint8_t _resv1[2];
+    uint8_t section_count;
+    uint8_t _resv2[4];
+    uint8_t _platform2[8];
+    uint32_t plid;
+    uint8_t _platform3[4];
+} QEMU_PACKED;
+
+struct rtas_event_log_v6_mainb {
+#define RTAS_LOG_V6_SECTION_ID_MAINB                0x5548 /* UH */
+    struct rtas_event_log_v6_section_header hdr;
+    uint8_t subsystem_id;
+    uint8_t _platform1;
+    uint8_t event_severity;
+    uint8_t event_subtype;
+    uint8_t _platform2[4];
+    uint8_t _resv1[2];
+    uint16_t action_flags;
+    uint8_t _resv2[4];
+} QEMU_PACKED;
+
+struct rtas_event_log_v6_epow {
+#define RTAS_LOG_V6_SECTION_ID_EPOW                 0x4550 /* EP */
+    struct rtas_event_log_v6_section_header hdr;
+    uint8_t sensor_value;
+#define RTAS_LOG_V6_EPOW_ACTION_RESET                    0
+#define RTAS_LOG_V6_EPOW_ACTION_WARN_COOLING             1
+#define RTAS_LOG_V6_EPOW_ACTION_WARN_POWER               2
+#define RTAS_LOG_V6_EPOW_ACTION_SYSTEM_SHUTDOWN          3
+#define RTAS_LOG_V6_EPOW_ACTION_SYSTEM_HALT              4
+#define RTAS_LOG_V6_EPOW_ACTION_MAIN_ENCLOSURE           5
+#define RTAS_LOG_V6_EPOW_ACTION_POWER_OFF                7
+    uint8_t event_modifier;
+#define RTAS_LOG_V6_EPOW_MODIFIER_NORMAL                 1
+#define RTAS_LOG_V6_EPOW_MODIFIER_ON_UPS                 2
+#define RTAS_LOG_V6_EPOW_MODIFIER_CRITICAL               3
+#define RTAS_LOG_V6_EPOW_MODIFIER_TEMPERATURE            4
+    uint8_t extended_modifier;
+#define RTAS_LOG_V6_EPOW_XMODIFIER_SYSTEM_WIDE           0
+#define RTAS_LOG_V6_EPOW_XMODIFIER_PARTITION_SPECIFIC    1
+    uint8_t _resv;
+    uint64_t reason_code;
+} QEMU_PACKED;
+
+struct epow_log_full {
+    struct rtas_error_log hdr;
+    struct rtas_event_log_v6 v6hdr;
+    struct rtas_event_log_v6_maina maina;
+    struct rtas_event_log_v6_mainb mainb;
+    struct rtas_event_log_v6_epow epow;
+} QEMU_PACKED;
+
+#define EVENT_MASK_INTERNAL_ERRORS           0x80000000
+#define EVENT_MASK_EPOW                      0x40000000
+#define EVENT_MASK_HOTPLUG                   0x10000000
+#define EVENT_MASK_IO                        0x08000000
+
+#define _FDT(exp) \
+    do { \
+        int ret = (exp);                                           \
+        if (ret < 0) {                                             \
+            fprintf(stderr, "qemu: error creating device tree: %s: %s\n", \
+                    #exp, fdt_strerror(ret));                      \
+            exit(1);                                               \
+        }                                                          \
+    } while (0)
+
+void spapr_events_fdt_skel(void *fdt, uint32_t epow_irq)
+{
+    uint32_t epow_irq_ranges[] = {cpu_to_be32(epow_irq), cpu_to_be32(1)};
+    uint32_t epow_interrupts[] = {cpu_to_be32(epow_irq), 0};
+
+    _FDT((fdt_begin_node(fdt, "event-sources")));
+
+    _FDT((fdt_property(fdt, "interrupt-controller", NULL, 0)));
+    _FDT((fdt_property_cell(fdt, "#interrupt-cells", 2)));
+    _FDT((fdt_property(fdt, "interrupt-ranges",
+                       epow_irq_ranges, sizeof(epow_irq_ranges))));
+
+    _FDT((fdt_begin_node(fdt, "epow-events")));
+    _FDT((fdt_property(fdt, "interrupts",
+                       epow_interrupts, sizeof(epow_interrupts))));
+    _FDT((fdt_end_node(fdt)));
+
+    _FDT((fdt_end_node(fdt)));
+}
+
+static struct epow_log_full *pending_epow;
+static uint32_t next_plid;
+
+static void spapr_powerdown_req(Notifier *n, void *opaque)
+{
+    sPAPREnvironment *spapr = container_of(n, sPAPREnvironment, epow_notifier);
+    struct rtas_error_log *hdr;
+    struct rtas_event_log_v6 *v6hdr;
+    struct rtas_event_log_v6_maina *maina;
+    struct rtas_event_log_v6_mainb *mainb;
+    struct rtas_event_log_v6_epow *epow;
+    struct tm tm;
+    int year;
+
+    if (pending_epow) {
+        /* For now, we just throw away earlier events if two come
+         * along before any are consumed.  This is sufficient for our
+         * powerdown messages, but we'll need more if we do more
+         * general error/event logging */
+        g_free(pending_epow);
+    }
+    pending_epow = g_malloc0(sizeof(*pending_epow));
+    hdr = &pending_epow->hdr;
+    v6hdr = &pending_epow->v6hdr;
+    maina = &pending_epow->maina;
+    mainb = &pending_epow->mainb;
+    epow = &pending_epow->epow;
+
+    hdr->summary = cpu_to_be32(RTAS_LOG_VERSION_6
+                               | RTAS_LOG_SEVERITY_EVENT
+                               | RTAS_LOG_DISPOSITION_NOT_RECOVERED
+                               | RTAS_LOG_OPTIONAL_PART_PRESENT
+                               | RTAS_LOG_TYPE_EPOW);
+    hdr->extended_length = cpu_to_be32(sizeof(*pending_epow)
+                                       - sizeof(pending_epow->hdr));
+
+    v6hdr->b0 = RTAS_LOG_V6_B0_VALID | RTAS_LOG_V6_B0_NEW_LOG
+        | RTAS_LOG_V6_B0_BIGENDIAN;
+    v6hdr->b2 = RTAS_LOG_V6_B2_POWERPC_FORMAT
+        | RTAS_LOG_V6_B2_LOG_FORMAT_PLATFORM_EVENT;
+    v6hdr->company = cpu_to_be32(RTAS_LOG_V6_COMPANY_IBM);
+
+    maina->hdr.section_id = cpu_to_be16(RTAS_LOG_V6_SECTION_ID_MAINA);
+    maina->hdr.section_length = cpu_to_be16(sizeof(*maina));
+    /* FIXME: section version, subtype and creator id? */
+    qemu_get_timedate(&tm, spapr->rtc_offset);
+    year = tm.tm_year + 1900;
+    maina->creation_date = cpu_to_be32((to_bcd(year / 100) << 24)
+                                       | (to_bcd(year % 100) << 16)
+                                       | (to_bcd(tm.tm_mon + 1) << 8)
+                                       | to_bcd(tm.tm_mday));
+    maina->creation_time = cpu_to_be32((to_bcd(tm.tm_hour) << 24)
+                                       | (to_bcd(tm.tm_min) << 16)
+                                       | (to_bcd(tm.tm_sec) << 8));
+    maina->creator_id = 'H'; /* Hypervisor */
+    maina->section_count = 3; /* Main-A, Main-B and EPOW */
+    maina->plid = next_plid++;
+
+    mainb->hdr.section_id = cpu_to_be16(RTAS_LOG_V6_SECTION_ID_MAINB);
+    mainb->hdr.section_length = cpu_to_be16(sizeof(*mainb));
+    /* FIXME: section version, subtype and creator id? */
+    mainb->subsystem_id = 0xa0; /* External environment */
+    mainb->event_severity = 0x00; /* Informational / non-error */
+    mainb->event_subtype = 0xd0; /* Normal shutdown */
+
+    epow->hdr.section_id = cpu_to_be16(RTAS_LOG_V6_SECTION_ID_EPOW);
+    epow->hdr.section_length = cpu_to_be16(sizeof(*epow));
+    epow->hdr.section_version = 2; /* includes extended modifier */
+    /* FIXME: section subtype and creator id? */
+    epow->sensor_value = RTAS_LOG_V6_EPOW_ACTION_SYSTEM_SHUTDOWN;
+    epow->event_modifier = RTAS_LOG_V6_EPOW_MODIFIER_NORMAL;
+    epow->extended_modifier = RTAS_LOG_V6_EPOW_XMODIFIER_PARTITION_SPECIFIC;
+
+    qemu_irq_pulse(xics_get_qirq(spapr->icp, spapr->epow_irq));
+}
+
+static void check_exception(sPAPREnvironment *spapr,
+                            uint32_t token, uint32_t nargs,
+                            target_ulong args,
+                            uint32_t nret, target_ulong rets)
+{
+    uint32_t mask, buf, len;
+    uint64_t xinfo;
+
+    if ((nargs < 6) || (nargs > 7) || nret != 1) {
+        rtas_st(rets, 0, -3);
+        return;
+    }
+
+    xinfo = rtas_ld(args, 1);
+    mask = rtas_ld(args, 2);
+    buf = rtas_ld(args, 4);
+    len = rtas_ld(args, 5);
+    if (nargs == 7) {
+        xinfo |= (uint64_t)rtas_ld(args, 6) << 32;
+    }
+
+    if ((mask & EVENT_MASK_EPOW) && pending_epow) {
+        if (sizeof(*pending_epow) < len) {
+            len = sizeof(*pending_epow);
+        }
+
+        cpu_physical_memory_write(buf, pending_epow, len);
+        g_free(pending_epow);
+        pending_epow = NULL;
+        rtas_st(rets, 0, 0);
+    } else {
+        rtas_st(rets, 0, 1);
+    }
+}
+
+void spapr_events_init(sPAPREnvironment *spapr)
+{
+    spapr->epow_irq = spapr_allocate_msi(0);
+    spapr->epow_notifier.notify = spapr_powerdown_req;
+    qemu_register_powerdown_notifier(&spapr->epow_notifier);
+    spapr_rtas_register("check-exception", check_exception);
+}
commit 1bfb37d1e077705de14f0f7fdc7b21749dbe4bc9
Author: David Gibson <david at gibson.dropbear.id.au>
Date:   Mon Oct 8 18:17:38 2012 +0000

    target-ppc: Rework storage of VPA registration state
    
    With PAPR guests, hypercalls allow registration of the Virtual Processor
    Area (VPA), SLB shadow and dispatch trace log (DTL), each of which allow
    for certain communication between the guest and hypervisor.  Currently, we
    store the addresses of the three areas and the size of the dtl in
    CPUPPCState.
    
    The SLB shadow and DTL are variable sized, with the size being retrieved
    from within the registered memory area at the hypercall time.  This size
    can later be overwritten with other information, however, so we need to
    save the size as of registration time.  We already do this for the DTL,
    but not for the SLB shadow, so this patch fixes that.
    
    In addition, we change the storage of the VPA information to use fixed
    size integer types which will make life easier for syncing this data with
    KVM, which we will need in future.
    
    Signed-off-by: David Gibson <david at gibson.dropbear.id.au>
    Signed-off-by: Alexander Graf <agraf at suse.de>

diff --git a/hw/spapr_hcall.c b/hw/spapr_hcall.c
index 762493a..621dabd 100644
--- a/hw/spapr_hcall.c
+++ b/hw/spapr_hcall.c
@@ -366,26 +366,26 @@ static target_ulong register_vpa(CPUPPCState *env, target_ulong vpa)
         return H_PARAMETER;
     }
 
-    env->vpa = vpa;
+    env->vpa_addr = vpa;
 
-    tmp = ldub_phys(env->vpa + VPA_SHARED_PROC_OFFSET);
+    tmp = ldub_phys(env->vpa_addr + VPA_SHARED_PROC_OFFSET);
     tmp |= VPA_SHARED_PROC_VAL;
-    stb_phys(env->vpa + VPA_SHARED_PROC_OFFSET, tmp);
+    stb_phys(env->vpa_addr + VPA_SHARED_PROC_OFFSET, tmp);
 
     return H_SUCCESS;
 }
 
 static target_ulong deregister_vpa(CPUPPCState *env, target_ulong vpa)
 {
-    if (env->slb_shadow) {
+    if (env->slb_shadow_addr) {
         return H_RESOURCE;
     }
 
-    if (env->dispatch_trace_log) {
+    if (env->dtl_addr) {
         return H_RESOURCE;
     }
 
-    env->vpa = 0;
+    env->vpa_addr = 0;
     return H_SUCCESS;
 }
 
@@ -407,18 +407,20 @@ static target_ulong register_slb_shadow(CPUPPCState *env, target_ulong addr)
         return H_PARAMETER;
     }
 
-    if (!env->vpa) {
+    if (!env->vpa_addr) {
         return H_RESOURCE;
     }
 
-    env->slb_shadow = addr;
+    env->slb_shadow_addr = addr;
+    env->slb_shadow_size = size;
 
     return H_SUCCESS;
 }
 
 static target_ulong deregister_slb_shadow(CPUPPCState *env, target_ulong addr)
 {
-    env->slb_shadow = 0;
+    env->slb_shadow_addr = 0;
+    env->slb_shadow_size = 0;
     return H_SUCCESS;
 }
 
@@ -437,11 +439,11 @@ static target_ulong register_dtl(CPUPPCState *env, target_ulong addr)
         return H_PARAMETER;
     }
 
-    if (!env->vpa) {
+    if (!env->vpa_addr) {
         return H_RESOURCE;
     }
 
-    env->dispatch_trace_log = addr;
+    env->dtl_addr = addr;
     env->dtl_size = size;
 
     return H_SUCCESS;
@@ -449,7 +451,7 @@ static target_ulong register_dtl(CPUPPCState *env, target_ulong addr)
 
 static target_ulong deregister_dtl(CPUPPCState *env, target_ulong addr)
 {
-    env->dispatch_trace_log = 0;
+    env->dtl_addr = 0;
     env->dtl_size = 0;
 
     return H_SUCCESS;
diff --git a/target-ppc/cpu.h b/target-ppc/cpu.h
index 3f114c9..286f42a 100644
--- a/target-ppc/cpu.h
+++ b/target-ppc/cpu.h
@@ -1045,10 +1045,9 @@ struct CPUPPCState {
 #endif
 
 #if defined(TARGET_PPC64) && !defined(CONFIG_USER_ONLY)
-    hwaddr vpa;
-    hwaddr slb_shadow;
-    hwaddr dispatch_trace_log;
-    uint32_t dtl_size;
+    hwaddr vpa_addr;
+    hwaddr slb_shadow_addr, slb_shadow_size;
+    hwaddr dtl_addr, dtl_size;
 #endif /* TARGET_PPC64 */
 
     int error_code;
diff --git a/target-ppc/translate_init.c b/target-ppc/translate_init.c
index ad54985..e63627c 100644
--- a/target-ppc/translate_init.c
+++ b/target-ppc/translate_init.c
@@ -10425,9 +10425,10 @@ static void ppc_cpu_reset(CPUState *s)
     env->error_code = 0;
 
 #if defined(TARGET_PPC64) && !defined(CONFIG_USER_ONLY)
-    env->vpa = 0;
-    env->slb_shadow = 0;
-    env->dispatch_trace_log = 0;
+    env->vpa_addr = 0;
+    env->slb_shadow_addr = 0;
+    env->slb_shadow_size = 0;
+    env->dtl_addr = 0;
     env->dtl_size = 0;
 #endif /* TARGET_PPC64 */
 
commit c89d52997cf4849a9ee16b5d2cf462d0cd137634
Author: David Gibson <david at gibson.dropbear.id.au>
Date:   Mon Oct 8 18:17:36 2012 +0000

    pseries: Don't allow duplicate registration of hcalls or RTAS calls
    
    Currently the pseries machine code allows a callback to be registered
    for a hypercall number twice, as long as it's the same callback the second
    time.  We don't test for duplicate registrations of RTAS callbacks at all
    so it will effectively be last registratiojn wins.
    
    This was originally done because it was awkward to ensure that the
    registration happened exactly once, but the code has since been
    restructured so that's no longer the case.
    
    Duplicate registration of a hypercall or RTAS call could well suggest
    a duplicate initialization which could cause other problems, so this patch
    makes duplicate registrations a bug, to prevent the old behaviour from
    hiding other bugs.
    
    Signed-off-by: David Gibson <david at gibson.dropbear.id.au>
    Signed-off-by: Alexander Graf <agraf at suse.de>

diff --git a/hw/spapr_hcall.c b/hw/spapr_hcall.c
index 194d9c2..762493a 100644
--- a/hw/spapr_hcall.c
+++ b/hw/spapr_hcall.c
@@ -670,11 +670,10 @@ void spapr_register_hypercall(target_ulong opcode, spapr_hcall_fn fn)
     } else {
         assert((opcode >= KVMPPC_HCALL_BASE) && (opcode <= KVMPPC_HCALL_MAX));
 
-
         slot = &kvmppc_hypercall_table[opcode - KVMPPC_HCALL_BASE];
     }
 
-    assert(!(*slot) || (fn == *slot));
+    assert(!(*slot));
     *slot = fn;
 }
 
diff --git a/hw/spapr_rtas.c b/hw/spapr_rtas.c
index 67da27b..ce76c58 100644
--- a/hw/spapr_rtas.c
+++ b/hw/spapr_rtas.c
@@ -241,6 +241,15 @@ target_ulong spapr_rtas_call(sPAPREnvironment *spapr,
 
 void spapr_rtas_register(const char *name, spapr_rtas_fn fn)
 {
+    int i;
+
+    for (i = 0; i < (rtas_next - rtas_table); i++) {
+        if (strcmp(name, rtas_table[i].name) == 0) {
+            fprintf(stderr, "RTAS call \"%s\" registered twice\n", name);
+            exit(1);
+        }
+    }
+
     assert(rtas_next < (rtas_table + TOKEN_MAX));
 
     rtas_next->name = name;
commit 094b287f0b3b79d1e68df8f3a63cb144ec9cdfb6
Author: zhlcindy at gmail.com <zhlcindy at gmail.com>
Date:   Sun Sep 2 19:25:28 2012 +0000

    Add USB option in machine options
    
    When -usb option is used, global varible usb_enabled is set.
    And all the plaform will create one USB controller according
    to this variable. In fact, global varibles make code hard
    to read.
    
    So this patch is to remove global variable usb_enabled and
    add USB option in machine options. All the plaforms will get
    USB option value from machine options.
    
    USB option of machine options will be set either by:
      * -usb
      * -machine type=pseries,usb=on
    
    Both these ways can work now. They both set USB option in
    machine options. In the future, the first way will be removed.
    
    Signed-off-by: Li Zhang <zhlcindy at linux.vnet.ibm.com>
    Acked-by: Alexander Graf <agraf at suse.de>
    Signed-off-by: Alexander Graf <agraf at suse.de>

diff --git a/hw/nseries.c b/hw/nseries.c
index 7ada90d..9306aa1 100644
--- a/hw/nseries.c
+++ b/hw/nseries.c
@@ -1334,8 +1334,9 @@ static void n8x0_init(ram_addr_t ram_size, const char *boot_device,
     n8x0_dss_setup(s);
     n8x0_cbus_setup(s);
     n8x0_uart_setup(s);
-    if (usb_enabled)
+    if (usb_enabled(false)) {
         n8x0_usb_setup(s);
+    }
 
     if (kernel_filename) {
         /* Or at the linux loader.  */
diff --git a/hw/pc_piix.c b/hw/pc_piix.c
index 47ebc1a..c7dd75b 100644
--- a/hw/pc_piix.c
+++ b/hw/pc_piix.c
@@ -267,7 +267,7 @@ static void pc_init1(MemoryRegion *system_memory,
     pc_cmos_init(below_4g_mem_size, above_4g_mem_size, boot_device,
                  floppy, idebus[0], idebus[1], rtc_state);
 
-    if (pci_enabled && usb_enabled) {
+    if (pci_enabled && usb_enabled(false)) {
         pci_create_simple(pci_bus, piix3_devfn + 2, "piix3-usb-uhci");
     }
 
diff --git a/hw/ppc_newworld.c b/hw/ppc_newworld.c
index 15f74f9..664747e 100644
--- a/hw/ppc_newworld.c
+++ b/hw/ppc_newworld.c
@@ -348,10 +348,6 @@ static void ppc_core99_init(QEMUMachineInitArgs *args)
     ide_mem[1] = pmac_ide_init(hd, pic[0x0d], dbdma, 0x16, pic[0x02]);
     ide_mem[2] = pmac_ide_init(&hd[MAX_IDE_DEVS], pic[0x0e], dbdma, 0x1a, pic[0x02]);
 
-    /* cuda also initialize ADB */
-    if (machine_arch == ARCH_MAC99_U3) {
-        usb_enabled = 1;
-    }
     cuda_init(&cuda_mem, pic[0x19]);
 
     adb_kbd_init(&adb_bus);
@@ -360,15 +356,14 @@ static void ppc_core99_init(QEMUMachineInitArgs *args)
     macio_init(pci_bus, PCI_DEVICE_ID_APPLE_UNI_N_KEYL, 0, pic_mem,
                dbdma_mem, cuda_mem, NULL, 3, ide_mem, escc_bar);
 
-    if (usb_enabled) {
+    if (usb_enabled(machine_arch == ARCH_MAC99_U3)) {
         pci_create_simple(pci_bus, -1, "pci-ohci");
-    }
-
-    /* U3 needs to use USB for input because Linux doesn't support via-cuda
-       on PPC64 */
-    if (machine_arch == ARCH_MAC99_U3) {
-        usbdevice_create("keyboard");
-        usbdevice_create("mouse");
+        /* U3 needs to use USB for input because Linux doesn't support via-cuda
+        on PPC64 */
+        if (machine_arch == ARCH_MAC99_U3) {
+            usbdevice_create("keyboard");
+            usbdevice_create("mouse");
+        }
     }
 
     if (graphic_depth != 15 && graphic_depth != 32 && graphic_depth != 8)
diff --git a/hw/ppc_oldworld.c b/hw/ppc_oldworld.c
index a4f899d..e8138c0 100644
--- a/hw/ppc_oldworld.c
+++ b/hw/ppc_oldworld.c
@@ -286,7 +286,7 @@ static void ppc_heathrow_init(QEMUMachineInitArgs *args)
     macio_init(pci_bus, PCI_DEVICE_ID_APPLE_343S1201, 1, pic_mem,
                dbdma_mem, cuda_mem, nvr, 2, ide_mem, escc_bar);
 
-    if (usb_enabled) {
+    if (usb_enabled(false)) {
         pci_create_simple(pci_bus, -1, "pci-ohci");
     }
 
diff --git a/hw/ppc_prep.c b/hw/ppc_prep.c
index 085851a..bf15730 100644
--- a/hw/ppc_prep.c
+++ b/hw/ppc_prep.c
@@ -661,7 +661,7 @@ static void ppc_prep_init(QEMUMachineInitArgs *args)
     memory_region_add_subregion(sysmem, 0xFEFF0000, xcsr);
 #endif
 
-    if (usb_enabled) {
+    if (usb_enabled(false)) {
         pci_create_simple(pci_bus, -1, "pci-ohci");
     }
 
diff --git a/hw/pxa2xx.c b/hw/pxa2xx.c
index 0fb2179..e616979 100644
--- a/hw/pxa2xx.c
+++ b/hw/pxa2xx.c
@@ -2108,7 +2108,7 @@ PXA2xxState *pxa270_init(MemoryRegion *address_space,
         s->ssp[i] = (SSIBus *)qdev_get_child_bus(dev, "ssi");
     }
 
-    if (usb_enabled) {
+    if (usb_enabled(false)) {
         sysbus_create_simple("sysbus-ohci", 0x4c000000,
                         qdev_get_gpio_in(s->pic, PXA2XX_PIC_USBH1));
     }
@@ -2239,7 +2239,7 @@ PXA2xxState *pxa255_init(MemoryRegion *address_space, unsigned int sdram_size)
         s->ssp[i] = (SSIBus *)qdev_get_child_bus(dev, "ssi");
     }
 
-    if (usb_enabled) {
+    if (usb_enabled(false)) {
         sysbus_create_simple("sysbus-ohci", 0x4c000000,
                         qdev_get_gpio_in(s->pic, PXA2XX_PIC_USBH1));
     }
diff --git a/hw/realview.c b/hw/realview.c
index baa92d4..b5cb08c 100644
--- a/hw/realview.c
+++ b/hw/realview.c
@@ -227,7 +227,7 @@ static void realview_init(ram_addr_t ram_size,
         sysbus_connect_irq(busdev, 2, pic[50]);
         sysbus_connect_irq(busdev, 3, pic[51]);
         pci_bus = (PCIBus *)qdev_get_child_bus(dev, "pci");
-        if (usb_enabled) {
+        if (usb_enabled(false)) {
             pci_create_simple(pci_bus, -1, "pci-ohci");
         }
         n = drive_get_max_bus(IF_SCSI);
diff --git a/hw/spapr.c b/hw/spapr.c
index 73d75e8..1587bc3 100644
--- a/hw/spapr.c
+++ b/hw/spapr.c
@@ -840,7 +840,7 @@ static void ppc_spapr_init(QEMUMachineInitArgs *args)
         spapr->has_graphics = true;
     }
 
-    if (usb_enabled) {
+    if (usb_enabled(spapr->has_graphics)) {
         pci_create_simple(phb->bus, -1, "pci-ohci");
         if (spapr->has_graphics) {
             usbdevice_create("keyboard");
diff --git a/hw/versatilepb.c b/hw/versatilepb.c
index f55bd0c..e85f982 100644
--- a/hw/versatilepb.c
+++ b/hw/versatilepb.c
@@ -248,7 +248,7 @@ static void versatile_init(ram_addr_t ram_size,
             pci_nic_init_nofail(nd, "rtl8139", NULL);
         }
     }
-    if (usb_enabled) {
+    if (usb_enabled(false)) {
         pci_create_simple(pci_bus, -1, "pci-ohci");
     }
     n = drive_get_max_bus(IF_SCSI);
diff --git a/qemu-config.c b/qemu-config.c
index cd1ec21..97ffb97 100644
--- a/qemu-config.c
+++ b/qemu-config.c
@@ -619,6 +619,10 @@ static QemuOptsList qemu_machine_opts = {
             .name = "mem-merge",
             .type = QEMU_OPT_BOOL,
             .help = "enable/disable memory merge support",
+        },{
+            .name = "usb",
+            .type = QEMU_OPT_BOOL,
+            .help = "Set on/off to enable/disable usb",
         },
         { /* End of list */ }
     },
diff --git a/sysemu.h b/sysemu.h
index 0c39a3a..f72b4ce 100644
--- a/sysemu.h
+++ b/sysemu.h
@@ -116,7 +116,6 @@ extern const char *keyboard_layout;
 extern int win2k_install_hack;
 extern int alt_grab;
 extern int ctrl_grab;
-extern int usb_enabled;
 extern int smp_cpus;
 extern int max_cpus;
 extern int cursor_hide;
@@ -186,4 +185,7 @@ void register_devices(void);
 void add_boot_device_path(int32_t bootindex, DeviceState *dev,
                           const char *suffix);
 char *get_boot_devices_list(uint32_t *size);
+
+bool usb_enabled(bool default_usb);
+
 #endif
diff --git a/vl.c b/vl.c
index 9f99ef4..eb533b2 100644
--- a/vl.c
+++ b/vl.c
@@ -203,7 +203,6 @@ CharDriverState *serial_hds[MAX_SERIAL_PORTS];
 CharDriverState *parallel_hds[MAX_PARALLEL_PORTS];
 CharDriverState *virtcon_hds[MAX_VIRTIO_CONSOLES];
 int win2k_install_hack = 0;
-int usb_enabled = 0;
 int singlestep = 0;
 int smp_cpus = 1;
 int max_cpus = 0;
@@ -790,6 +789,17 @@ static int parse_sandbox(QemuOpts *opts, void *opaque)
     return 0;
 }
 
+/*********QEMU USB setting******/
+bool usb_enabled(bool default_usb)
+{
+    QemuOpts *mach_opts;
+    mach_opts = qemu_opts_find(qemu_find_opts("machine"), 0);
+    if (mach_opts) {
+        return qemu_opt_get_bool(mach_opts, "usb", default_usb);
+    }
+    return default_usb;
+}
+
 /***********************************************************/
 /* QEMU Block devices */
 
@@ -1077,8 +1087,9 @@ static int usb_device_add(const char *devname)
     const char *p;
     USBDevice *dev = NULL;
 
-    if (!usb_enabled)
+    if (!usb_enabled(false)) {
         return -1;
+    }
 
     /* drivers with .usbdevice_name entry in USBDeviceInfo */
     dev = usbdevice_create(devname);
@@ -1114,8 +1125,9 @@ static int usb_device_del(const char *devname)
     if (strstart(devname, "host:", &p))
         return usb_host_device_close(p);
 
-    if (!usb_enabled)
+    if (!usb_enabled(false)) {
         return -1;
+    }
 
     p = strchr(devname, '.');
     if (!p)
@@ -3083,10 +3095,16 @@ int main(int argc, char **argv, char **envp)
                 }
                 break;
             case QEMU_OPTION_usb:
-                usb_enabled = 1;
+                machine_opts = qemu_opts_find(qemu_find_opts("machine"), 0);
+                if (machine_opts) {
+                    qemu_opt_set_bool(machine_opts, "usb", true);
+                }
                 break;
             case QEMU_OPTION_usbdevice:
-                usb_enabled = 1;
+                machine_opts = qemu_opts_find(qemu_find_opts("machine"), 0);
+                if (machine_opts) {
+                    qemu_opt_set_bool(machine_opts, "usb", true);
+                }
                 add_device_config(DEV_USB, optarg);
                 break;
             case QEMU_OPTION_device:
@@ -3653,7 +3671,7 @@ int main(int argc, char **argv, char **envp)
     current_machine = machine;
 
     /* init USB devices */
-    if (usb_enabled) {
+    if (usb_enabled(false)) {
         if (foreach_device_config(DEV_USB, usb_parse) < 0)
             exit(1);
     }
commit 59de4f98d14fb84f04181a6f816ef6dd8b82dd79
Author: Bharat Bhushan <r65777 at freescale.com>
Date:   Mon Oct 8 06:46:54 2012 +0000

    e500: Fix serial initialization
    
    it was wrongly using serial_hds[0] instead of serial_hds[1]
    
    Signed-off-by: Bharat Bhushan <bharat.bhushan at freescale.com>
    Reviewed-by: Andreas FÃ¤rber <afaerber at suse.de>
    Signed-off-by: Alexander Graf <agraf at suse.de>

diff --git a/hw/ppc/e500.c b/hw/ppc/e500.c
index d655e3f..90d88eb 100644
--- a/hw/ppc/e500.c
+++ b/hw/ppc/e500.c
@@ -496,7 +496,7 @@ void ppce500_init(PPCE500Params *params)
     if (serial_hds[1]) {
         serial_mm_init(address_space_mem, MPC8544_SERIAL1_REGS_BASE,
                        0, mpic[12+26], 399193,
-                       serial_hds[0], DEVICE_BIG_ENDIAN);
+                       serial_hds[1], DEVICE_BIG_ENDIAN);
     }
 
     /* General Utility device */
commit e598a9c56badd00eaa732c4e0e69def4e5e6614b
Author: Alexander Graf <agraf at suse.de>
Date:   Sat Oct 6 22:54:25 2012 +0200

    PPC: 440: Emulate DCBR0
    
    The DCBR0 register on 440 is used to implement system reset. The same
    register is used on 405 as well, so just reuse the code.
    
    Signed-off-by: Alexander Graf <agraf at suse.de>

diff --git a/target-ppc/translate_init.c b/target-ppc/translate_init.c
index a972287..ad54985 100644
--- a/target-ppc/translate_init.c
+++ b/target-ppc/translate_init.c
@@ -1498,7 +1498,7 @@ static void gen_spr_BookE (CPUPPCState *env, uint64_t ivor_mask)
     /* XXX : not implemented */
     spr_register(env, SPR_BOOKE_DBCR0, "DBCR0",
                  SPR_NOACCESS, SPR_NOACCESS,
-                 &spr_read_generic, &spr_write_generic,
+                 &spr_read_generic, &spr_write_40x_dbcr0,
                  0x00000000);
     /* XXX : not implemented */
     spr_register(env, SPR_BOOKE_DBCR1, "DBCR1",
commit 5232fa59b17b45c04bd24e0d38224964816bf391
Author: Alexander Graf <agraf at suse.de>
Date:   Sat Oct 6 02:02:05 2012 +0200

    PPC: Bamboo: Fix memory size DT property
    
    Device tree properties need to be specified in big endian. Fix the
    bamboo memory size property accordingly.
    
    Signed-off-by: Alexander Graf <agraf at suse.de>
    CC: qemu-stable at nongnu.org

diff --git a/hw/ppc440_bamboo.c b/hw/ppc440_bamboo.c
index a6b1d51..cc85607 100644
--- a/hw/ppc440_bamboo.c
+++ b/hw/ppc440_bamboo.c
@@ -59,7 +59,7 @@ static int bamboo_load_device_tree(hwaddr addr,
 {
     int ret = -1;
 #ifdef CONFIG_FDT
-    uint32_t mem_reg_property[] = { 0, 0, ramsize };
+    uint32_t mem_reg_property[] = { 0, 0, cpu_to_be32(ramsize) };
     char *filename;
     int fdt_size;
     void *fdt;
commit 21b2f13ae21974e0fd7f8da99d84628a8000d1d7
Author: Peter Maydell <peter.maydell at linaro.org>
Date:   Fri Oct 5 07:09:02 2012 +0000

    Drop unnecessary check of TARGET_PHYS_ADDR_SPACE_BITS
    
    For all our PPC targets the physical address space is at least
    36 bits, so drop an unnecessary preprocessor conditional check
    on TARGET_PHYS_ADDR_SPACE_BITS (erroneously introduced as part
    of the change from target_phys_addr_t to hwaddr). This brings
    this bit of code into line with the way we handle the other
    cases which were originally checking TARGET_PHYS_ADDR_BITS in
    order to avoid compiler complaints about overflowing a 32 bit type.
    
    Signed-off-by: Peter Maydell <peter.maydell at linaro.org>
    Signed-off-by: Alexander Graf <agraf at suse.de>

diff --git a/target-ppc/mmu_helper.c b/target-ppc/mmu_helper.c
index 4a9bb5b..811f47f 100644
--- a/target-ppc/mmu_helper.c
+++ b/target-ppc/mmu_helper.c
@@ -1509,10 +1509,8 @@ static void mmubooke_dump_mmu(FILE *f, fprintf_function cpu_fprintf,
         mask = ~(entry->size - 1);
         ea = entry->EPN & mask;
         pa = entry->RPN & mask;
-#if (TARGET_PHYS_ADDR_SPACE_BITS >= 36)
         /* Extend the physical address to 36 bits */
         pa |= (hwaddr)(entry->RPN & 0xF) << 32;
-#endif
         size /= 1024;
         if (size >= 1024) {
             snprintf(size_buf, sizeof(size_buf), "%3" PRId64 "M", size / 1024);
commit 01f590d5a3385c3d79c6afda7acd66edf4bf48d3
Author: Eduardo Habkost <ehabkost at redhat.com>
Date:   Wed Oct 24 12:10:33 2012 -0200

    target-i386: cpu: recover items 28-31 of ext2_feature_name
    
    I removed a line by mistake on commit
    3b671a40cab2404bc63e57db8cd3afa4ec70bfab, containing the flags lm/i64,
    3dnow, and 3dnowext. This patch restores the removed line.
    
    Reviewed-by: Don Slutz <Don at cloudswitch.com>
    Signed-off-by: Eduardo Habkost <ehabkost at redhat.com>
    Signed-off-by: Aurelien Jarno <aurelien at aurel32.net>

diff --git a/target-i386/cpu.c b/target-i386/cpu.c
index f3708e6..6411042 100644
--- a/target-i386/cpu.c
+++ b/target-i386/cpu.c
@@ -74,6 +74,7 @@ static const char *ext2_feature_name[] = {
     NULL /* pat */, NULL /* pse36 */, NULL, NULL /* Linux mp */,
     "nx|xd", NULL, "mmxext", NULL /* mmx */,
     NULL /* fxsr */, "fxsr_opt|ffxsr", "pdpe1gb" /* AMD Page1GB */, "rdtscp",
+    NULL, "lm|i64", "3dnowext", "3dnow",
 };
 static const char *ext3_feature_name[] = {
     "lahf_lm" /* AMD LahfSahf */, "cmp_legacy", "svm", "extapic" /* AMD ExtApicSpace */,
commit 1e9737da4aafb54681203931dfe8f8eea21fcef7
Author: Richard Henderson <rth at twiddle.net>
Date:   Tue Oct 23 07:33:00 2012 +1000

    qemu-timer: Check for usable fields for SIGEV_THREAD_ID
    
    Older glibc (RHEL 5.x, Debian 5.x) does not have the _sigev_un._tid
    member in its structure definition, while the accompanying kernel
    headers do define SIGEV_THREAD_ID.  We need configure to check for
    both before using it.
    
    Signed-off-by: Richard Henderson <rth at twiddle.net>
    Signed-off-by: Aurelien Jarno <aurelien at aurel32.net>

diff --git a/configure b/configure
index 37f712d..3685020 100755
--- a/configure
+++ b/configure
@@ -2813,6 +2813,24 @@ if compile_prog "" "" ; then
 fi
 
 ##########################################
+# check if we have usable SIGEV_THREAD_ID
+
+sigev_thread_id=no
+cat > $TMPC << EOF
+#include <signal.h>
+int main(void) {
+  struct sigevent ev;
+  ev.sigev_notify = SIGEV_THREAD_ID;
+  ev._sigev_un._tid = 0;
+  asm volatile("" : : "g"(&ev));
+  return 0;
+}
+EOF
+if compile_prog "" "" ; then
+    sigev_thread_id=yes
+fi
+
+##########################################
 # check if trace backend exists
 
 $python "$source_path/scripts/tracetool.py" "--backend=$trace_backend" --check-backend  > /dev/null 2> /dev/null
@@ -3159,6 +3177,7 @@ echo "preadv support    $preadv"
 echo "fdatasync         $fdatasync"
 echo "madvise           $madvise"
 echo "posix_madvise     $posix_madvise"
+echo "sigev_thread_id   $sigev_thread_id"
 echo "uuid support      $uuid"
 echo "libcap-ng support $cap_ng"
 echo "vhost-net support $vhost_net"
@@ -3443,6 +3462,9 @@ fi
 if test "$posix_madvise" = "yes" ; then
   echo "CONFIG_POSIX_MADVISE=y" >> $config_host_mak
 fi
+if test "$sigev_thread_id" = "yes" ; then
+  echo "CONFIG_SIGEV_THREAD_ID=y" >> $config_host_mak
+fi
 
 if test "$spice" = "yes" ; then
   echo "CONFIG_SPICE=y" >> $config_host_mak
diff --git a/qemu-timer.c b/qemu-timer.c
index 908a103..ede84ff 100644
--- a/qemu-timer.c
+++ b/qemu-timer.c
@@ -494,12 +494,12 @@ static int dynticks_start_timer(struct qemu_alarm_timer *t)
     memset(&ev, 0, sizeof(ev));
     ev.sigev_value.sival_int = 0;
     ev.sigev_notify = SIGEV_SIGNAL;
-#ifdef SIGEV_THREAD_ID
+#ifdef CONFIG_SIGEV_THREAD_ID
     if (qemu_signalfd_available()) {
         ev.sigev_notify = SIGEV_THREAD_ID;
         ev._sigev_un._tid = qemu_get_thread_id();
     }
-#endif /* SIGEV_THREAD_ID */
+#endif /* CONFIG_SIGEV_THREAD_ID */
     ev.sigev_signo = SIGALRM;
 
     if (timer_create(CLOCK_REALTIME, &ev, &host_timer)) {
commit 44b37ace0636cd229e956c5000a1bb6a49e274e7
Author: Richard Henderson <rth at twiddle.net>
Date:   Mon Oct 22 12:11:07 2012 +1000

    tcg-i386: Use %gs prefixes for x86_64 GUEST_BASE
    
    When we allocate a reserved_va for the guest, the kernel will likely
    choose an address well above 4G.  At which point we must use a pair
    of movabsq+addq to form the host address.  If we have OS support,
    set up a segment register to point to guest_base instead.
    
    Signed-off-by: Richard Henderson <rth at twiddle.net>
    Signed-off-by: Aurelien Jarno <aurelien at aurel32.net>

diff --git a/tcg/i386/tcg-target.c b/tcg/i386/tcg-target.c
index 9c8f69a..e45a5a0 100644
--- a/tcg/i386/tcg-target.c
+++ b/tcg/i386/tcg-target.c
@@ -232,11 +232,13 @@ static inline int tcg_target_const_match(tcg_target_long val,
 # define P_REXW		0x800		/* Set REX.W = 1 */
 # define P_REXB_R	0x1000		/* REG field as byte register */
 # define P_REXB_RM	0x2000		/* R/M field as byte register */
+# define P_GS           0x4000          /* gs segment override */
 #else
 # define P_ADDR32	0
 # define P_REXW		0
 # define P_REXB_R	0
 # define P_REXB_RM	0
+# define P_GS           0
 #endif
 
 #define OPC_ARITH_EvIz	(0x81)
@@ -352,6 +354,9 @@ static void tcg_out_opc(TCGContext *s, int opc, int r, int rm, int x)
 {
     int rex;
 
+    if (opc & P_GS) {
+        tcg_out8(s, 0x65);
+    }
     if (opc & P_DATA16) {
         /* We should never be asking for both 16 and 64-bit operation.  */
         assert((opc & P_REXW) == 0);
@@ -1076,10 +1081,27 @@ static inline void tcg_out_tlb_load(TCGContext *s, int addrlo_idx,
     tcg_out_modrm_offset(s, OPC_ADD_GvEv + P_REXW, r1, r0,
                          offsetof(CPUTLBEntry, addend) - which);
 }
-#endif
+#elif defined(__x86_64__) && defined(__linux__)
+# include <asm/prctl.h>
+# include <sys/prctl.h>
+
+int arch_prctl(int code, unsigned long addr);
+
+static int guest_base_flags;
+static inline void setup_guest_base_seg(void)
+{
+    if (arch_prctl(ARCH_SET_GS, GUEST_BASE) == 0) {
+        guest_base_flags = P_GS;
+    }
+}
+#else
+# define guest_base_flags 0
+static inline void setup_guest_base_seg(void) { }
+#endif /* SOFTMMU */
 
 static void tcg_out_qemu_ld_direct(TCGContext *s, int datalo, int datahi,
-                                   int base, tcg_target_long ofs, int sizeop)
+                                   int base, tcg_target_long ofs, int seg,
+                                   int sizeop)
 {
 #ifdef TARGET_WORDS_BIGENDIAN
     const int bswap = 1;
@@ -1088,28 +1110,29 @@ static void tcg_out_qemu_ld_direct(TCGContext *s, int datalo, int datahi,
 #endif
     switch (sizeop) {
     case 0:
-        tcg_out_modrm_offset(s, OPC_MOVZBL, datalo, base, ofs);
+        tcg_out_modrm_offset(s, OPC_MOVZBL + seg, datalo, base, ofs);
         break;
     case 0 | 4:
-        tcg_out_modrm_offset(s, OPC_MOVSBL + P_REXW, datalo, base, ofs);
+        tcg_out_modrm_offset(s, OPC_MOVSBL + P_REXW + seg, datalo, base, ofs);
         break;
     case 1:
-        tcg_out_modrm_offset(s, OPC_MOVZWL, datalo, base, ofs);
+        tcg_out_modrm_offset(s, OPC_MOVZWL + seg, datalo, base, ofs);
         if (bswap) {
             tcg_out_rolw_8(s, datalo);
         }
         break;
     case 1 | 4:
         if (bswap) {
-            tcg_out_modrm_offset(s, OPC_MOVZWL, datalo, base, ofs);
+            tcg_out_modrm_offset(s, OPC_MOVZWL + seg, datalo, base, ofs);
             tcg_out_rolw_8(s, datalo);
             tcg_out_modrm(s, OPC_MOVSWL + P_REXW, datalo, datalo);
         } else {
-            tcg_out_modrm_offset(s, OPC_MOVSWL + P_REXW, datalo, base, ofs);
+            tcg_out_modrm_offset(s, OPC_MOVSWL + P_REXW + seg,
+                                 datalo, base, ofs);
         }
         break;
     case 2:
-        tcg_out_ld(s, TCG_TYPE_I32, datalo, base, ofs);
+        tcg_out_modrm_offset(s, OPC_MOVL_GvEv + seg, datalo, base, ofs);
         if (bswap) {
             tcg_out_bswap32(s, datalo);
         }
@@ -1117,17 +1140,18 @@ static void tcg_out_qemu_ld_direct(TCGContext *s, int datalo, int datahi,
 #if TCG_TARGET_REG_BITS == 64
     case 2 | 4:
         if (bswap) {
-            tcg_out_ld(s, TCG_TYPE_I32, datalo, base, ofs);
+            tcg_out_modrm_offset(s, OPC_MOVL_GvEv + seg, datalo, base, ofs);
             tcg_out_bswap32(s, datalo);
             tcg_out_ext32s(s, datalo, datalo);
         } else {
-            tcg_out_modrm_offset(s, OPC_MOVSLQ, datalo, base, ofs);
+            tcg_out_modrm_offset(s, OPC_MOVSLQ + seg, datalo, base, ofs);
         }
         break;
 #endif
     case 3:
         if (TCG_TARGET_REG_BITS == 64) {
-            tcg_out_ld(s, TCG_TYPE_I64, datalo, base, ofs);
+            tcg_out_modrm_offset(s, OPC_MOVL_GvEv + P_REXW + seg,
+                                 datalo, base, ofs);
             if (bswap) {
                 tcg_out_bswap64(s, datalo);
             }
@@ -1138,11 +1162,15 @@ static void tcg_out_qemu_ld_direct(TCGContext *s, int datalo, int datahi,
                 datahi = t;
             }
             if (base != datalo) {
-                tcg_out_ld(s, TCG_TYPE_I32, datalo, base, ofs);
-                tcg_out_ld(s, TCG_TYPE_I32, datahi, base, ofs + 4);
+                tcg_out_modrm_offset(s, OPC_MOVL_GvEv + seg,
+                                     datalo, base, ofs);
+                tcg_out_modrm_offset(s, OPC_MOVL_GvEv + seg,
+                                     datahi, base, ofs + 4);
             } else {
-                tcg_out_ld(s, TCG_TYPE_I32, datahi, base, ofs + 4);
-                tcg_out_ld(s, TCG_TYPE_I32, datalo, base, ofs);
+                tcg_out_modrm_offset(s, OPC_MOVL_GvEv + seg,
+                                     datahi, base, ofs + 4);
+                tcg_out_modrm_offset(s, OPC_MOVL_GvEv + seg,
+                                     datalo, base, ofs);
             }
             if (bswap) {
                 tcg_out_bswap32(s, datalo);
@@ -1186,7 +1214,7 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
                      label_ptr, offsetof(CPUTLBEntry, addr_read));
 
     /* TLB Hit.  */
-    tcg_out_qemu_ld_direct(s, data_reg, data_reg2, TCG_REG_L1, 0, opc);
+    tcg_out_qemu_ld_direct(s, data_reg, data_reg2, TCG_REG_L1, 0, 0, opc);
 
     /* jmp label2 */
     tcg_out8(s, OPC_JMP_short);
@@ -1273,29 +1301,31 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
     {
         int32_t offset = GUEST_BASE;
         int base = args[addrlo_idx];
-
-        if (TCG_TARGET_REG_BITS == 64) {
-            /* ??? We assume all operations have left us with register
-               contents that are zero extended.  So far this appears to
-               be true.  If we want to enforce this, we can either do
-               an explicit zero-extension here, or (if GUEST_BASE == 0)
-               use the ADDR32 prefix.  For now, do nothing.  */
-
-            if (offset != GUEST_BASE) {
-                tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_L1, GUEST_BASE);
-                tgen_arithr(s, ARITH_ADD + P_REXW, TCG_REG_L1, base);
-                base = TCG_REG_L1;
-                offset = 0;
-            }
+        int seg = 0;
+
+        /* ??? We assume all operations have left us with register contents
+           that are zero extended.  So far this appears to be true.  If we
+           want to enforce this, we can either do an explicit zero-extension
+           here, or (if GUEST_BASE == 0, or a segment register is in use)
+           use the ADDR32 prefix.  For now, do nothing.  */
+        if (GUEST_BASE && guest_base_flags) {
+            seg = guest_base_flags;
+            offset = 0;
+        } else if (TCG_TARGET_REG_BITS == 64 && offset != GUEST_BASE) {
+            tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_L1, GUEST_BASE);
+            tgen_arithr(s, ARITH_ADD + P_REXW, TCG_REG_L1, base);
+            base = TCG_REG_L1;
+            offset = 0;
         }
 
-        tcg_out_qemu_ld_direct(s, data_reg, data_reg2, base, offset, opc);
+        tcg_out_qemu_ld_direct(s, data_reg, data_reg2, base, offset, seg, opc);
     }
 #endif
 }
 
 static void tcg_out_qemu_st_direct(TCGContext *s, int datalo, int datahi,
-                                   int base, tcg_target_long ofs, int sizeop)
+                                   int base, tcg_target_long ofs, int seg,
+                                   int sizeop)
 {
 #ifdef TARGET_WORDS_BIGENDIAN
     const int bswap = 1;
@@ -1310,7 +1340,8 @@ static void tcg_out_qemu_st_direct(TCGContext *s, int datalo, int datahi,
 
     switch (sizeop) {
     case 0:
-        tcg_out_modrm_offset(s, OPC_MOVB_EvGv + P_REXB_R, datalo, base, ofs);
+        tcg_out_modrm_offset(s, OPC_MOVB_EvGv + P_REXB_R + seg,
+                             datalo, base, ofs);
         break;
     case 1:
         if (bswap) {
@@ -1318,7 +1349,8 @@ static void tcg_out_qemu_st_direct(TCGContext *s, int datalo, int datahi,
             tcg_out_rolw_8(s, scratch);
             datalo = scratch;
         }
-        tcg_out_modrm_offset(s, OPC_MOVL_EvGv + P_DATA16, datalo, base, ofs);
+        tcg_out_modrm_offset(s, OPC_MOVL_EvGv + P_DATA16 + seg,
+                             datalo, base, ofs);
         break;
     case 2:
         if (bswap) {
@@ -1326,7 +1358,7 @@ static void tcg_out_qemu_st_direct(TCGContext *s, int datalo, int datahi,
             tcg_out_bswap32(s, scratch);
             datalo = scratch;
         }
-        tcg_out_st(s, TCG_TYPE_I32, datalo, base, ofs);
+        tcg_out_modrm_offset(s, OPC_MOVL_EvGv + seg, datalo, base, ofs);
         break;
     case 3:
         if (TCG_TARGET_REG_BITS == 64) {
@@ -1335,17 +1367,18 @@ static void tcg_out_qemu_st_direct(TCGContext *s, int datalo, int datahi,
                 tcg_out_bswap64(s, scratch);
                 datalo = scratch;
             }
-            tcg_out_st(s, TCG_TYPE_I64, datalo, base, ofs);
+            tcg_out_modrm_offset(s, OPC_MOVL_EvGv + P_REXW + seg,
+                                 datalo, base, ofs);
         } else if (bswap) {
             tcg_out_mov(s, TCG_TYPE_I32, scratch, datahi);
             tcg_out_bswap32(s, scratch);
-            tcg_out_st(s, TCG_TYPE_I32, scratch, base, ofs);
+            tcg_out_modrm_offset(s, OPC_MOVL_EvGv + seg, scratch, base, ofs);
             tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
             tcg_out_bswap32(s, scratch);
-            tcg_out_st(s, TCG_TYPE_I32, scratch, base, ofs + 4);
+            tcg_out_modrm_offset(s, OPC_MOVL_EvGv + seg, scratch, base, ofs+4);
         } else {
-            tcg_out_st(s, TCG_TYPE_I32, datalo, base, ofs);
-            tcg_out_st(s, TCG_TYPE_I32, datahi, base, ofs + 4);
+            tcg_out_modrm_offset(s, OPC_MOVL_EvGv + seg, datalo, base, ofs);
+            tcg_out_modrm_offset(s, OPC_MOVL_EvGv + seg, datahi, base, ofs+4);
         }
         break;
     default:
@@ -1379,7 +1412,7 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
                      label_ptr, offsetof(CPUTLBEntry, addr_write));
 
     /* TLB Hit.  */
-    tcg_out_qemu_st_direct(s, data_reg, data_reg2, TCG_REG_L1, 0, opc);
+    tcg_out_qemu_st_direct(s, data_reg, data_reg2, TCG_REG_L1, 0, 0, opc);
 
     /* jmp label2 */
     tcg_out8(s, OPC_JMP_short);
@@ -1436,23 +1469,24 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
     {
         int32_t offset = GUEST_BASE;
         int base = args[addrlo_idx];
-
-        if (TCG_TARGET_REG_BITS == 64) {
-            /* ??? We assume all operations have left us with register
-               contents that are zero extended.  So far this appears to
-               be true.  If we want to enforce this, we can either do
-               an explicit zero-extension here, or (if GUEST_BASE == 0)
-               use the ADDR32 prefix.  For now, do nothing.  */
-
-            if (offset != GUEST_BASE) {
-                tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_L1, GUEST_BASE);
-                tgen_arithr(s, ARITH_ADD + P_REXW, TCG_REG_L1, base);
-                base = TCG_REG_L1;
-                offset = 0;
-            }
+        int seg = 0;
+
+        /* ??? We assume all operations have left us with register contents
+           that are zero extended.  So far this appears to be true.  If we
+           want to enforce this, we can either do an explicit zero-extension
+           here, or (if GUEST_BASE == 0, or a segment register is in use)
+           use the ADDR32 prefix.  For now, do nothing.  */
+        if (GUEST_BASE && guest_base_flags) {
+            seg = guest_base_flags;
+            offset = 0;
+        } else if (TCG_TARGET_REG_BITS == 64 && offset != GUEST_BASE) {
+            tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_L1, GUEST_BASE);
+            tgen_arithr(s, ARITH_ADD + P_REXW, TCG_REG_L1, base);
+            base = TCG_REG_L1;
+            offset = 0;
         }
 
-        tcg_out_qemu_st_direct(s, data_reg, data_reg2, base, offset, opc);
+        tcg_out_qemu_st_direct(s, data_reg, data_reg2, base, offset, seg, opc);
     }
 #endif
 }
@@ -2046,6 +2080,13 @@ static void tcg_target_qemu_prologue(TCGContext *s)
         tcg_out_pop(s, tcg_target_callee_save_regs[i]);
     }
     tcg_out_opc(s, OPC_RET, 0, 0, 0);
+
+#if !defined(CONFIG_SOFTMMU)
+    /* Try to set up a segment register to point to GUEST_BASE.  */
+    if (GUEST_BASE) {
+        setup_guest_base_seg();
+    }
+#endif
 }
 
 static void tcg_target_init(TCGContext *s)
commit d73ee8a2b5267adae41ecb464cae06a185d2b0f9
Author: Richard Henderson <rth at twiddle.net>
Date:   Tue Oct 9 21:53:19 2012 +0200

    target-mips: Use TCG registers for the FPU.
    
    With normal FP, this doesn't have much affect on the generated code,
    because most of the FP operations are not CONST/PURE, and so we spill
    registers in about the same frequency as the explicit load/stores.
    
    But with Loongson multimedia instructions, which are all integral and
    whose helpers are in fact CONST+PURE, this greatly improves the code.
    
    Signed-off-by: Richard Henderson <rth at twiddle.net>
    Signed-off-by: Aurelien Jarno <aurelien at aurel32.net>

diff --git a/target-mips/translate.c b/target-mips/translate.c
index 454e5cc..ed55e26 100644
--- a/target-mips/translate.c
+++ b/target-mips/translate.c
@@ -575,6 +575,7 @@ static TCGv cpu_HI[MIPS_DSP_ACC], cpu_LO[MIPS_DSP_ACC], cpu_ACX[MIPS_DSP_ACC];
 static TCGv cpu_dspctrl, btarget, bcond;
 static TCGv_i32 hflags;
 static TCGv_i32 fpu_fcr0, fpu_fcr31;
+static TCGv_i64 fpu_f64[32];
 
 static uint32_t gen_opc_hflags[OPC_BUF_SIZE];
 
@@ -642,26 +643,31 @@ enum {
     BS_EXCP     = 3, /* We reached an exception condition */
 };
 
-static const char *regnames[] =
-    { "r0", "at", "v0", "v1", "a0", "a1", "a2", "a3",
-      "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7",
-      "s0", "s1", "s2", "s3", "s4", "s5", "s6", "s7",
-      "t8", "t9", "k0", "k1", "gp", "sp", "s8", "ra", };
+static const char * const regnames[] = {
+    "r0", "at", "v0", "v1", "a0", "a1", "a2", "a3",
+    "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7",
+    "s0", "s1", "s2", "s3", "s4", "s5", "s6", "s7",
+    "t8", "t9", "k0", "k1", "gp", "sp", "s8", "ra",
+};
 
-static const char *regnames_HI[] =
-    { "HI0", "HI1", "HI2", "HI3", };
+static const char * const regnames_HI[] = {
+    "HI0", "HI1", "HI2", "HI3",
+};
 
-static const char *regnames_LO[] =
-    { "LO0", "LO1", "LO2", "LO3", };
+static const char * const regnames_LO[] = {
+    "LO0", "LO1", "LO2", "LO3",
+};
 
-static const char *regnames_ACX[] =
-    { "ACX0", "ACX1", "ACX2", "ACX3", };
+static const char * const regnames_ACX[] = {
+    "ACX0", "ACX1", "ACX2", "ACX3",
+};
 
-static const char *fregnames[] =
-    { "f0",  "f1",  "f2",  "f3",  "f4",  "f5",  "f6",  "f7",
-      "f8",  "f9",  "f10", "f11", "f12", "f13", "f14", "f15",
-      "f16", "f17", "f18", "f19", "f20", "f21", "f22", "f23",
-      "f24", "f25", "f26", "f27", "f28", "f29", "f30", "f31", };
+static const char * const fregnames[] = {
+    "f0",  "f1",  "f2",  "f3",  "f4",  "f5",  "f6",  "f7",
+    "f8",  "f9",  "f10", "f11", "f12", "f13", "f14", "f15",
+    "f16", "f17", "f18", "f19", "f20", "f21", "f22", "f23",
+    "f24", "f25", "f26", "f27", "f28", "f29", "f30", "f31",
+};
 
 #define MIPS_DEBUG(fmt, ...)                                                  \
     do {                                                                      \
@@ -758,54 +764,54 @@ static inline void gen_store_srsgpr (int from, int to)
 }
 
 /* Floating point register moves. */
-static inline void gen_load_fpr32 (TCGv_i32 t, int reg)
+static void gen_load_fpr32(TCGv_i32 t, int reg)
 {
-    tcg_gen_ld_i32(t, cpu_env, offsetof(CPUMIPSState, active_fpu.fpr[reg].w[FP_ENDIAN_IDX]));
+    tcg_gen_trunc_i64_i32(t, fpu_f64[reg]);
 }
 
-static inline void gen_store_fpr32 (TCGv_i32 t, int reg)
+static void gen_store_fpr32(TCGv_i32 t, int reg)
 {
-    tcg_gen_st_i32(t, cpu_env, offsetof(CPUMIPSState, active_fpu.fpr[reg].w[FP_ENDIAN_IDX]));
+    TCGv_i64 t64 = tcg_temp_new_i64();
+    tcg_gen_extu_i32_i64(t64, t);
+    tcg_gen_deposit_i64(fpu_f64[reg], fpu_f64[reg], t64, 0, 32);
+    tcg_temp_free_i64(t64);
 }
 
-static inline void gen_load_fpr32h (TCGv_i32 t, int reg)
+static void gen_load_fpr32h(TCGv_i32 t, int reg)
 {
-    tcg_gen_ld_i32(t, cpu_env, offsetof(CPUMIPSState, active_fpu.fpr[reg].w[!FP_ENDIAN_IDX]));
+    TCGv_i64 t64 = tcg_temp_new_i64();
+    tcg_gen_shri_i64(t64, fpu_f64[reg], 32);
+    tcg_gen_trunc_i64_i32(t, t64);
+    tcg_temp_free_i64(t64);
 }
 
-static inline void gen_store_fpr32h (TCGv_i32 t, int reg)
+static void gen_store_fpr32h(TCGv_i32 t, int reg)
 {
-    tcg_gen_st_i32(t, cpu_env, offsetof(CPUMIPSState, active_fpu.fpr[reg].w[!FP_ENDIAN_IDX]));
+    TCGv_i64 t64 = tcg_temp_new_i64();
+    tcg_gen_extu_i32_i64(t64, t);
+    tcg_gen_deposit_i64(fpu_f64[reg], fpu_f64[reg], t64, 32, 32);
+    tcg_temp_free_i64(t64);
 }
 
-static inline void gen_load_fpr64 (DisasContext *ctx, TCGv_i64 t, int reg)
+static void gen_load_fpr64(DisasContext *ctx, TCGv_i64 t, int reg)
 {
     if (ctx->hflags & MIPS_HFLAG_F64) {
-        tcg_gen_ld_i64(t, cpu_env, offsetof(CPUMIPSState, active_fpu.fpr[reg].d));
+        tcg_gen_mov_i64(t, fpu_f64[reg]);
     } else {
-        TCGv_i32 t0 = tcg_temp_new_i32();
-        TCGv_i32 t1 = tcg_temp_new_i32();
-        gen_load_fpr32(t0, reg & ~1);
-        gen_load_fpr32(t1, reg | 1);
-        tcg_gen_concat_i32_i64(t, t0, t1);
-        tcg_temp_free_i32(t0);
-        tcg_temp_free_i32(t1);
+        tcg_gen_concat32_i64(t, fpu_f64[reg & ~1], fpu_f64[reg | 1]);
     }
 }
 
-static inline void gen_store_fpr64 (DisasContext *ctx, TCGv_i64 t, int reg)
+static void gen_store_fpr64(DisasContext *ctx, TCGv_i64 t, int reg)
 {
     if (ctx->hflags & MIPS_HFLAG_F64) {
-        tcg_gen_st_i64(t, cpu_env, offsetof(CPUMIPSState, active_fpu.fpr[reg].d));
+        tcg_gen_mov_i64(fpu_f64[reg], t);
     } else {
-        TCGv_i64 t0 = tcg_temp_new_i64();
-        TCGv_i32 t1 = tcg_temp_new_i32();
-        tcg_gen_trunc_i64_i32(t1, t);
-        gen_store_fpr32(t1, reg & ~1);
+        TCGv_i64 t0;
+        tcg_gen_deposit_i64(fpu_f64[reg & ~1], fpu_f64[reg & ~1], t, 0, 32);
+        t0 = tcg_temp_new_i64();
         tcg_gen_shri_i64(t0, t, 32);
-        tcg_gen_trunc_i64_i32(t1, t0);
-        gen_store_fpr32(t1, reg | 1);
-        tcg_temp_free_i32(t1);
+        tcg_gen_deposit_i64(fpu_f64[reg | 1], fpu_f64[reg | 1], t0, 0, 32);
         tcg_temp_free_i64(t0);
     }
 }
@@ -13073,6 +13079,12 @@ static void mips_tcg_init(void)
         cpu_gpr[i] = tcg_global_mem_new(TCG_AREG0,
                                         offsetof(CPUMIPSState, active_tc.gpr[i]),
                                         regnames[i]);
+
+    for (i = 0; i < 32; i++) {
+        int off = offsetof(CPUMIPSState, active_fpu.fpr[i]);
+        fpu_f64[i] = tcg_global_mem_new_i64(TCG_AREG0, off, fregnames[i]);
+    }
+
     cpu_PC = tcg_global_mem_new(TCG_AREG0,
                                 offsetof(CPUMIPSState, active_tc.PC), "PC");
     for (i = 0; i < MIPS_DSP_ACC; i++) {
commit b393ab4228adc9338c17340d825f7a60e5f6820d
Author: Aurelien Jarno <aurelien at aurel32.net>
Date:   Tue Oct 9 21:53:09 2012 +0200

    tcg: remove compatiblity call flags
    
    Reviewed-by: Richard Henderson <rth at twiddle.net>
    Signed-off-by: Aurelien Jarno <aurelien at aurel32.net>

diff --git a/tcg/tcg.h b/tcg/tcg.h
index 96b6222..a6c9256 100644
--- a/tcg/tcg.h
+++ b/tcg/tcg.h
@@ -268,10 +268,6 @@ typedef int TCGv_i64;
 #define TCG_CALL_NO_RWG_SE      (TCG_CALL_NO_RWG | TCG_CALL_NO_SE)
 #define TCG_CALL_NO_WG_SE       (TCG_CALL_NO_WG | TCG_CALL_NO_SE)
 
-/* compatibility call flags, they should be eventually be removed */
-#define TCG_CALL_PURE           TCG_CALL_NO_SIDE_EFFECTS
-#define TCG_CALL_CONST          TCG_CALL_NO_READ_GLOBALS
-
 /* used to align parameters */
 #define TCG_CALL_DUMMY_TCGV     MAKE_TCGV_I32(-1)
 #define TCG_CALL_DUMMY_ARG      ((TCGArg)(-1))
commit f0de41325a195503d352d8e154c1015290f4c87c
Author: Aurelien Jarno <aurelien at aurel32.net>
Date:   Tue Oct 9 21:53:09 2012 +0200

    target-xtensa: rename helper flags
    
    Rename helper flags to the new ones. This is purely a mechanical change,
    it's possible to use better flags by looking at the helpers.
    
    Cc: Max Filippov <jcmvbkbc at gmail.com>
    Reviewed-by: Richard Henderson <rth at twiddle.net>
    Signed-off-by: Aurelien Jarno <aurelien at aurel32.net>

diff --git a/target-xtensa/helper.h b/target-xtensa/helper.h
index 4cc0088..1163c09 100644
--- a/target-xtensa/helper.h
+++ b/target-xtensa/helper.h
@@ -5,8 +5,8 @@ DEF_HELPER_3(exception_cause, noreturn, env, i32, i32)
 DEF_HELPER_4(exception_cause_vaddr, noreturn, env, i32, i32, i32)
 DEF_HELPER_3(debug_exception, noreturn, env, i32, i32)
 
-DEF_HELPER_FLAGS_1(nsa, TCG_CALL_CONST | TCG_CALL_PURE, i32, i32)
-DEF_HELPER_FLAGS_1(nsau, TCG_CALL_CONST | TCG_CALL_PURE, i32, i32)
+DEF_HELPER_FLAGS_1(nsa, TCG_CALL_NO_RWG_SE, i32, i32)
+DEF_HELPER_FLAGS_1(nsau, TCG_CALL_NO_RWG_SE, i32, i32)
 DEF_HELPER_2(wsr_windowbase, void, env, i32)
 DEF_HELPER_4(entry, void, env, i32, i32, i32)
 DEF_HELPER_2(retw, i32, env, i32)
@@ -25,8 +25,8 @@ DEF_HELPER_2(advance_ccount, void, env, i32)
 DEF_HELPER_1(check_interrupts, void, env)
 
 DEF_HELPER_2(wsr_rasid, void, env, i32)
-DEF_HELPER_FLAGS_3(rtlb0, TCG_CALL_CONST | TCG_CALL_PURE, i32, env, i32, i32)
-DEF_HELPER_FLAGS_3(rtlb1, TCG_CALL_CONST | TCG_CALL_PURE, i32, env, i32, i32)
+DEF_HELPER_FLAGS_3(rtlb0, TCG_CALL_NO_RWG_SE, i32, env, i32, i32)
+DEF_HELPER_FLAGS_3(rtlb1, TCG_CALL_NO_RWG_SE, i32, env, i32, i32)
 DEF_HELPER_3(itlb, void, env, i32, i32)
 DEF_HELPER_3(ptlb, i32, env, i32, i32)
 DEF_HELPER_4(wtlb, void, env, i32, i32, i32)
@@ -37,15 +37,15 @@ DEF_HELPER_3(wsr_dbreaka, void, env, i32, i32)
 DEF_HELPER_3(wsr_dbreakc, void, env, i32, i32)
 
 DEF_HELPER_2(wur_fcr, void, env, i32)
-DEF_HELPER_FLAGS_1(abs_s, TCG_CALL_CONST | TCG_CALL_PURE, f32, f32)
-DEF_HELPER_FLAGS_1(neg_s, TCG_CALL_CONST | TCG_CALL_PURE, f32, f32)
+DEF_HELPER_FLAGS_1(abs_s, TCG_CALL_NO_RWG_SE, f32, f32)
+DEF_HELPER_FLAGS_1(neg_s, TCG_CALL_NO_RWG_SE, f32, f32)
 DEF_HELPER_3(add_s, f32, env, f32, f32)
 DEF_HELPER_3(sub_s, f32, env, f32, f32)
 DEF_HELPER_3(mul_s, f32, env, f32, f32)
 DEF_HELPER_4(madd_s, f32, env, f32, f32, f32)
 DEF_HELPER_4(msub_s, f32, env, f32, f32, f32)
-DEF_HELPER_FLAGS_3(ftoi, TCG_CALL_CONST | TCG_CALL_PURE, i32, f32, i32, i32)
-DEF_HELPER_FLAGS_3(ftoui, TCG_CALL_CONST | TCG_CALL_PURE, i32, f32, i32, i32)
+DEF_HELPER_FLAGS_3(ftoi, TCG_CALL_NO_RWG_SE, i32, f32, i32, i32)
+DEF_HELPER_FLAGS_3(ftoui, TCG_CALL_NO_RWG_SE, i32, f32, i32, i32)
 DEF_HELPER_3(itof, f32, env, i32, i32)
 DEF_HELPER_3(uitof, f32, env, i32, i32)
 
commit c33b235358a4c385abe36b3082ee9437bc86c2f5
Author: Aurelien Jarno <aurelien at aurel32.net>
Date:   Tue Oct 9 21:53:09 2012 +0200

    target-sparc: rename helper flags
    
    Rename helper flags to the new ones. This is purely a mechanical change,
    it's possible to use better flags by looking at the helpers.
    
    Acked-by: Blue Swirl <blauwirbel at gmail.com>
    Reviewed-by: Richard Henderson <rth at twiddle.net>
    Signed-off-by: Aurelien Jarno <aurelien at aurel32.net>

diff --git a/target-sparc/helper.h b/target-sparc/helper.h
index e1ae3c7..098c482 100644
--- a/target-sparc/helper.h
+++ b/target-sparc/helper.h
@@ -16,7 +16,7 @@ DEF_HELPER_1(rdccr, tl, env)
 DEF_HELPER_2(wrccr, void, env, tl)
 DEF_HELPER_1(rdcwp, tl, env)
 DEF_HELPER_2(wrcwp, void, env, tl)
-DEF_HELPER_FLAGS_2(array8, TCG_CALL_CONST | TCG_CALL_PURE, tl, tl, tl)
+DEF_HELPER_FLAGS_2(array8, TCG_CALL_NO_RWG_SE, tl, tl, tl)
 DEF_HELPER_1(popc, tl, tl)
 DEF_HELPER_4(ldda_asi, void, env, tl, int, int)
 DEF_HELPER_5(ldf_asi, void, env, tl, int, int, int)
@@ -51,7 +51,7 @@ DEF_HELPER_5(ld_asi, i64, env, tl, int, int, int)
 DEF_HELPER_5(st_asi, void, env, tl, i64, int, int)
 #endif
 DEF_HELPER_2(ldfsr, void, env, i32)
-DEF_HELPER_FLAGS_1(fabss, TCG_CALL_CONST | TCG_CALL_PURE, f32, f32)
+DEF_HELPER_FLAGS_1(fabss, TCG_CALL_NO_RWG_SE, f32, f32)
 DEF_HELPER_2(fsqrts, f32, env, f32)
 DEF_HELPER_2(fsqrtd, f64, env, f64)
 DEF_HELPER_3(fcmps, void, env, f32, f32)
@@ -63,7 +63,7 @@ DEF_HELPER_1(fcmpq, void, env)
 DEF_HELPER_1(fcmpeq, void, env)
 #ifdef TARGET_SPARC64
 DEF_HELPER_2(ldxfsr, void, env, i64)
-DEF_HELPER_FLAGS_1(fabsd, TCG_CALL_CONST | TCG_CALL_PURE, f64, f64)
+DEF_HELPER_FLAGS_1(fabsd, TCG_CALL_NO_RWG_SE, f64, f64)
 DEF_HELPER_3(fcmps_fcc1, void, env, f32, f32)
 DEF_HELPER_3(fcmps_fcc2, void, env, f32, f32)
 DEF_HELPER_3(fcmps_fcc3, void, env, f32, f32)
@@ -104,14 +104,14 @@ DEF_HELPER_3(fdivs, f32, env, f32, f32)
 DEF_HELPER_3(fsmuld, f64, env, f32, f32)
 DEF_HELPER_3(fdmulq, void, env, f64, f64);
 
-DEF_HELPER_FLAGS_1(fnegs, TCG_CALL_CONST | TCG_CALL_PURE, f32, f32)
+DEF_HELPER_FLAGS_1(fnegs, TCG_CALL_NO_RWG_SE, f32, f32)
 DEF_HELPER_2(fitod, f64, env, s32)
 DEF_HELPER_2(fitoq, void, env, s32)
 
 DEF_HELPER_2(fitos, f32, env, s32)
 
 #ifdef TARGET_SPARC64
-DEF_HELPER_FLAGS_1(fnegd, TCG_CALL_CONST | TCG_CALL_PURE, f64, f64)
+DEF_HELPER_FLAGS_1(fnegd, TCG_CALL_NO_RWG_SE, f64, f64)
 DEF_HELPER_1(fnegq, void, env)
 DEF_HELPER_2(fxtos, f32, env, s64)
 DEF_HELPER_2(fxtod, f64, env, s64)
@@ -131,36 +131,36 @@ DEF_HELPER_2(fstox, s64, env, f32)
 DEF_HELPER_2(fdtox, s64, env, f64)
 DEF_HELPER_1(fqtox, s64, env)
 
-DEF_HELPER_FLAGS_2(fpmerge, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
-DEF_HELPER_FLAGS_2(fmul8x16, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
-DEF_HELPER_FLAGS_2(fmul8x16al, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
-DEF_HELPER_FLAGS_2(fmul8x16au, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
-DEF_HELPER_FLAGS_2(fmul8sux16, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
-DEF_HELPER_FLAGS_2(fmul8ulx16, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
-DEF_HELPER_FLAGS_2(fmuld8sux16, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
-DEF_HELPER_FLAGS_2(fmuld8ulx16, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
-DEF_HELPER_FLAGS_2(fexpand, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
-DEF_HELPER_FLAGS_3(pdist, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64, i64)
-DEF_HELPER_FLAGS_2(fpack16, TCG_CALL_CONST | TCG_CALL_PURE, i32, i64, i64)
-DEF_HELPER_FLAGS_3(fpack32, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64, i64)
-DEF_HELPER_FLAGS_2(fpackfix, TCG_CALL_CONST | TCG_CALL_PURE, i32, i64, i64)
-DEF_HELPER_FLAGS_3(bshuffle, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64, i64)
+DEF_HELPER_FLAGS_2(fpmerge, TCG_CALL_NO_RWG_SE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(fmul8x16, TCG_CALL_NO_RWG_SE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(fmul8x16al, TCG_CALL_NO_RWG_SE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(fmul8x16au, TCG_CALL_NO_RWG_SE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(fmul8sux16, TCG_CALL_NO_RWG_SE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(fmul8ulx16, TCG_CALL_NO_RWG_SE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(fmuld8sux16, TCG_CALL_NO_RWG_SE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(fmuld8ulx16, TCG_CALL_NO_RWG_SE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(fexpand, TCG_CALL_NO_RWG_SE, i64, i64, i64)
+DEF_HELPER_FLAGS_3(pdist, TCG_CALL_NO_RWG_SE, i64, i64, i64, i64)
+DEF_HELPER_FLAGS_2(fpack16, TCG_CALL_NO_RWG_SE, i32, i64, i64)
+DEF_HELPER_FLAGS_3(fpack32, TCG_CALL_NO_RWG_SE, i64, i64, i64, i64)
+DEF_HELPER_FLAGS_2(fpackfix, TCG_CALL_NO_RWG_SE, i32, i64, i64)
+DEF_HELPER_FLAGS_3(bshuffle, TCG_CALL_NO_RWG_SE, i64, i64, i64, i64)
 #define VIS_HELPER(name)                                                 \
-    DEF_HELPER_FLAGS_2(f ## name ## 16, TCG_CALL_CONST | TCG_CALL_PURE,  \
+    DEF_HELPER_FLAGS_2(f ## name ## 16, TCG_CALL_NO_RWG_SE,  \
                        i64, i64, i64)                                    \
-    DEF_HELPER_FLAGS_2(f ## name ## 16s, TCG_CALL_CONST | TCG_CALL_PURE, \
+    DEF_HELPER_FLAGS_2(f ## name ## 16s, TCG_CALL_NO_RWG_SE, \
                        i32, i32, i32)                                    \
-    DEF_HELPER_FLAGS_2(f ## name ## 32, TCG_CALL_CONST | TCG_CALL_PURE,  \
+    DEF_HELPER_FLAGS_2(f ## name ## 32, TCG_CALL_NO_RWG_SE,  \
                        i64, i64, i64)                                    \
-    DEF_HELPER_FLAGS_2(f ## name ## 32s, TCG_CALL_CONST | TCG_CALL_PURE, \
+    DEF_HELPER_FLAGS_2(f ## name ## 32s, TCG_CALL_NO_RWG_SE, \
                        i32, i32, i32)
 
 VIS_HELPER(padd);
 VIS_HELPER(psub);
 #define VIS_CMPHELPER(name)                                              \
-    DEF_HELPER_FLAGS_2(f##name##16, TCG_CALL_CONST | TCG_CALL_PURE,      \
+    DEF_HELPER_FLAGS_2(f##name##16, TCG_CALL_NO_RWG_SE,      \
                        i64, i64, i64)                                    \
-    DEF_HELPER_FLAGS_2(f##name##32, TCG_CALL_CONST | TCG_CALL_PURE,      \
+    DEF_HELPER_FLAGS_2(f##name##32, TCG_CALL_NO_RWG_SE,      \
                        i64, i64, i64)
 VIS_CMPHELPER(cmpgt);
 VIS_CMPHELPER(cmpeq);
commit 76ab687946b31c8ca5308bed1d85391f4f5c599c
Author: Aurelien Jarno <aurelien at aurel32.net>
Date:   Tue Oct 9 21:53:09 2012 +0200

    target-sh4: rename helper flags
    
    Rename helper flags to the new ones. This is purely a mechanical change,
    it's possible to use better flags by looking at the helpers.
    
    Reviewed-by: Richard Henderson <rth at twiddle.net>
    Signed-off-by: Aurelien Jarno <aurelien at aurel32.net>

diff --git a/target-sh4/helper.h b/target-sh4/helper.h
index 6c1a47d..304b77b 100644
--- a/target-sh4/helper.h
+++ b/target-sh4/helper.h
@@ -19,8 +19,8 @@ DEF_HELPER_3(macw, void, env, i32, i32)
 
 DEF_HELPER_2(ld_fpscr, void, env, i32)
 
-DEF_HELPER_FLAGS_1(fabs_FT, TCG_CALL_CONST | TCG_CALL_PURE, f32, f32)
-DEF_HELPER_FLAGS_1(fabs_DT, TCG_CALL_CONST | TCG_CALL_PURE, f64, f64)
+DEF_HELPER_FLAGS_1(fabs_FT, TCG_CALL_NO_RWG_SE, f32, f32)
+DEF_HELPER_FLAGS_1(fabs_DT, TCG_CALL_NO_RWG_SE, f64, f64)
 DEF_HELPER_3(fadd_FT, f32, env, f32, f32)
 DEF_HELPER_3(fadd_DT, f64, env, f64, f64)
 DEF_HELPER_2(fcnvsd_FT_DT, f64, env, f32)
@@ -37,7 +37,7 @@ DEF_HELPER_2(float_DT, f64, env, i32)
 DEF_HELPER_4(fmac_FT, f32, env, f32, f32, f32)
 DEF_HELPER_3(fmul_FT, f32, env, f32, f32)
 DEF_HELPER_3(fmul_DT, f64, env, f64, f64)
-DEF_HELPER_FLAGS_1(fneg_T, TCG_CALL_CONST | TCG_CALL_PURE, f32, f32)
+DEF_HELPER_FLAGS_1(fneg_T, TCG_CALL_NO_RWG_SE, f32, f32)
 DEF_HELPER_3(fsub_FT, f32, env, f32, f32)
 DEF_HELPER_3(fsub_DT, f64, env, f64, f64)
 DEF_HELPER_2(fsqrt_FT, f32, env, f32)
commit 9995567b3ecef5e0edd7d7301b722949320f89be
Author: Aurelien Jarno <aurelien at aurel32.net>
Date:   Tue Oct 9 21:53:09 2012 +0200

    target-s390x: rename helper flags
    
    Rename helper flags to the new ones. This is purely a mechanical change,
    it's possible to use better flags by looking at the helpers.
    
    Cc: Alexander Graf <agraf at suse.de>
    Reviewed-by: Richard Henderson <rth at twiddle.net>
    Signed-off-by: Aurelien Jarno <aurelien at aurel32.net>

diff --git a/target-s390x/helper.h b/target-s390x/helper.h
index 5419f37..ac44eab 100644
--- a/target-s390x/helper.h
+++ b/target-s390x/helper.h
@@ -7,21 +7,21 @@ DEF_HELPER_4(xc, i32, env, i32, i64, i64)
 DEF_HELPER_4(mvc, void, env, i32, i64, i64)
 DEF_HELPER_4(clc, i32, env, i32, i64, i64)
 DEF_HELPER_3(mvcl, i32, env, i32, i32)
-DEF_HELPER_FLAGS_1(set_cc_comp_s32, TCG_CALL_PURE|TCG_CALL_CONST, i32, s32)
-DEF_HELPER_FLAGS_1(set_cc_comp_s64, TCG_CALL_PURE|TCG_CALL_CONST, i32, s64)
-DEF_HELPER_FLAGS_2(set_cc_icm, TCG_CALL_PURE|TCG_CALL_CONST, i32, i32, i32)
+DEF_HELPER_FLAGS_1(set_cc_comp_s32, TCG_CALL_NO_RWG_SE, i32, s32)
+DEF_HELPER_FLAGS_1(set_cc_comp_s64, TCG_CALL_NO_RWG_SE, i32, s64)
+DEF_HELPER_FLAGS_2(set_cc_icm, TCG_CALL_NO_RWG_SE, i32, i32, i32)
 DEF_HELPER_4(clm, i32, env, i32, i32, i64)
 DEF_HELPER_4(stcm, void, env, i32, i32, i64)
 DEF_HELPER_3(mlg, void, env, i32, i64)
 DEF_HELPER_3(dlg, void, env, i32, i64)
-DEF_HELPER_FLAGS_3(set_cc_add64, TCG_CALL_PURE|TCG_CALL_CONST, i32, s64, s64, s64)
-DEF_HELPER_FLAGS_3(set_cc_addu64, TCG_CALL_PURE|TCG_CALL_CONST, i32, i64, i64, i64)
-DEF_HELPER_FLAGS_3(set_cc_add32, TCG_CALL_PURE|TCG_CALL_CONST, i32, s32, s32, s32)
-DEF_HELPER_FLAGS_3(set_cc_addu32, TCG_CALL_PURE|TCG_CALL_CONST, i32, i32, i32, i32)
-DEF_HELPER_FLAGS_3(set_cc_sub64, TCG_CALL_PURE|TCG_CALL_CONST, i32, s64, s64, s64)
-DEF_HELPER_FLAGS_3(set_cc_subu64, TCG_CALL_PURE|TCG_CALL_CONST, i32, i64, i64, i64)
-DEF_HELPER_FLAGS_3(set_cc_sub32, TCG_CALL_PURE|TCG_CALL_CONST, i32, s32, s32, s32)
-DEF_HELPER_FLAGS_3(set_cc_subu32, TCG_CALL_PURE|TCG_CALL_CONST, i32, i32, i32, i32)
+DEF_HELPER_FLAGS_3(set_cc_add64, TCG_CALL_NO_RWG_SE, i32, s64, s64, s64)
+DEF_HELPER_FLAGS_3(set_cc_addu64, TCG_CALL_NO_RWG_SE, i32, i64, i64, i64)
+DEF_HELPER_FLAGS_3(set_cc_add32, TCG_CALL_NO_RWG_SE, i32, s32, s32, s32)
+DEF_HELPER_FLAGS_3(set_cc_addu32, TCG_CALL_NO_RWG_SE, i32, i32, i32, i32)
+DEF_HELPER_FLAGS_3(set_cc_sub64, TCG_CALL_NO_RWG_SE, i32, s64, s64, s64)
+DEF_HELPER_FLAGS_3(set_cc_subu64, TCG_CALL_NO_RWG_SE, i32, i64, i64, i64)
+DEF_HELPER_FLAGS_3(set_cc_sub32, TCG_CALL_NO_RWG_SE, i32, s32, s32, s32)
+DEF_HELPER_FLAGS_3(set_cc_subu32, TCG_CALL_NO_RWG_SE, i32, i32, i32, i32)
 DEF_HELPER_4(srst, i32, env, i32, i32, i32)
 DEF_HELPER_4(clst, i32, env, i32, i32, i32)
 DEF_HELPER_4(mvpg, void, env, i64, i64, i64)
@@ -30,15 +30,15 @@ DEF_HELPER_4(csg, i32, env, i32, i64, i32)
 DEF_HELPER_4(cdsg, i32, env, i32, i64, i32)
 DEF_HELPER_4(cs, i32, env, i32, i64, i32)
 DEF_HELPER_5(ex, i32, env, i32, i64, i64, i64)
-DEF_HELPER_FLAGS_1(abs_i32, TCG_CALL_PURE|TCG_CALL_CONST, i32, s32)
-DEF_HELPER_FLAGS_1(nabs_i32, TCG_CALL_PURE|TCG_CALL_CONST, s32, s32)
-DEF_HELPER_FLAGS_1(abs_i64, TCG_CALL_PURE|TCG_CALL_CONST, i64, s64)
-DEF_HELPER_FLAGS_1(nabs_i64, TCG_CALL_PURE|TCG_CALL_CONST, s64, s64)
+DEF_HELPER_FLAGS_1(abs_i32, TCG_CALL_NO_RWG_SE, i32, s32)
+DEF_HELPER_FLAGS_1(nabs_i32, TCG_CALL_NO_RWG_SE, s32, s32)
+DEF_HELPER_FLAGS_1(abs_i64, TCG_CALL_NO_RWG_SE, i64, s64)
+DEF_HELPER_FLAGS_1(nabs_i64, TCG_CALL_NO_RWG_SE, s64, s64)
 DEF_HELPER_4(stcmh, void, env, i32, i64, i32)
 DEF_HELPER_4(icmh, i32, env, i32, i64, i32)
 DEF_HELPER_3(ipm, void, env, i32, i32)
-DEF_HELPER_FLAGS_3(addc_u32, TCG_CALL_PURE|TCG_CALL_CONST, i32, i32, i32, i32)
-DEF_HELPER_FLAGS_3(set_cc_addc_u64, TCG_CALL_PURE|TCG_CALL_CONST, i32, i64, i64, i64)
+DEF_HELPER_FLAGS_3(addc_u32, TCG_CALL_NO_RWG_SE, i32, i32, i32, i32)
+DEF_HELPER_FLAGS_3(set_cc_addc_u64, TCG_CALL_NO_RWG_SE, i32, i64, i64, i64)
 DEF_HELPER_4(stam, void, env, i32, i64, i32)
 DEF_HELPER_4(lam, void, env, i32, i64, i32)
 DEF_HELPER_4(mvcle, i32, env, i32, i64, i32)
@@ -82,9 +82,9 @@ DEF_HELPER_3(seb, void, env, i32, i32)
 DEF_HELPER_3(sdb, i32, env, i32, i64)
 DEF_HELPER_3(mdb, void, env, i32, i64)
 DEF_HELPER_3(ddb, void, env, i32, i64)
-DEF_HELPER_FLAGS_3(cebr, TCG_CALL_PURE, i32, env, i32, i32)
-DEF_HELPER_FLAGS_3(cdbr, TCG_CALL_PURE, i32, env, i32, i32)
-DEF_HELPER_FLAGS_3(cxbr, TCG_CALL_PURE, i32, env, i32, i32)
+DEF_HELPER_FLAGS_3(cebr, TCG_CALL_NO_SE, i32, env, i32, i32)
+DEF_HELPER_FLAGS_3(cdbr, TCG_CALL_NO_SE, i32, env, i32, i32)
+DEF_HELPER_FLAGS_3(cxbr, TCG_CALL_NO_SE, i32, env, i32, i32)
 DEF_HELPER_4(cgebr, i32, env, i32, i32, i32)
 DEF_HELPER_4(cgdbr, i32, env, i32, i32, i32)
 DEF_HELPER_4(cgxbr, i32, env, i32, i32, i32)
@@ -104,12 +104,12 @@ DEF_HELPER_4(madbr, void, env, i32, i32, i32)
 DEF_HELPER_4(msdbr, void, env, i32, i32, i32)
 DEF_HELPER_3(ldeb, void, env, i32, i64)
 DEF_HELPER_3(lxdb, void, env, i32, i64)
-DEF_HELPER_FLAGS_3(tceb, TCG_CALL_PURE, i32, env, i32, i64)
-DEF_HELPER_FLAGS_3(tcdb, TCG_CALL_PURE, i32, env, i32, i64)
-DEF_HELPER_FLAGS_3(tcxb, TCG_CALL_PURE, i32, env, i32, i64)
+DEF_HELPER_FLAGS_3(tceb, TCG_CALL_NO_SE, i32, env, i32, i64)
+DEF_HELPER_FLAGS_3(tcdb, TCG_CALL_NO_SE, i32, env, i32, i64)
+DEF_HELPER_FLAGS_3(tcxb, TCG_CALL_NO_SE, i32, env, i32, i64)
 DEF_HELPER_3(flogr, i32, env, i32, i64)
 DEF_HELPER_3(sqdbr, void, env, i32, i32)
-DEF_HELPER_FLAGS_1(cvd, TCG_CALL_PURE|TCG_CALL_CONST, i64, s32)
+DEF_HELPER_FLAGS_1(cvd, TCG_CALL_NO_RWG_SE, i64, s32)
 DEF_HELPER_4(unpk, void, env, i32, i64, i64)
 DEF_HELPER_4(tr, void, env, i32, i64, i64)
 
@@ -117,36 +117,36 @@ DEF_HELPER_3(servc, i32, env, i32, i64)
 DEF_HELPER_4(diag, i64, env, i32, i64, i64)
 DEF_HELPER_3(load_psw, void, env, i64, i64)
 DEF_HELPER_1(program_interrupt, void, i32)
-DEF_HELPER_FLAGS_2(stidp, TCG_CALL_CONST, void, env, i64)
-DEF_HELPER_FLAGS_2(spx, TCG_CALL_CONST, void, env, i64)
-DEF_HELPER_FLAGS_1(sck, TCG_CALL_CONST, i32, i64)
+DEF_HELPER_FLAGS_2(stidp, TCG_CALL_NO_RWG, void, env, i64)
+DEF_HELPER_FLAGS_2(spx, TCG_CALL_NO_RWG, void, env, i64)
+DEF_HELPER_FLAGS_1(sck, TCG_CALL_NO_RWG, i32, i64)
 DEF_HELPER_2(stck, i32, env, i64)
 DEF_HELPER_2(stcke, i32, env, i64)
-DEF_HELPER_FLAGS_2(sckc, TCG_CALL_CONST, void, env, i64)
-DEF_HELPER_FLAGS_2(stckc, TCG_CALL_CONST, void, env, i64)
-DEF_HELPER_FLAGS_2(spt, TCG_CALL_CONST, void, env, i64)
-DEF_HELPER_FLAGS_2(stpt, TCG_CALL_CONST, void, env, i64)
+DEF_HELPER_FLAGS_2(sckc, TCG_CALL_NO_RWG, void, env, i64)
+DEF_HELPER_FLAGS_2(stckc, TCG_CALL_NO_RWG, void, env, i64)
+DEF_HELPER_FLAGS_2(spt, TCG_CALL_NO_RWG, void, env, i64)
+DEF_HELPER_FLAGS_2(stpt, TCG_CALL_NO_RWG, void, env, i64)
 DEF_HELPER_4(stsi, i32, env, i64, i32, i32)
 DEF_HELPER_4(lctl, void, env, i32, i64, i32)
 DEF_HELPER_4(lctlg, void, env, i32, i64, i32)
 DEF_HELPER_4(stctl, void, env, i32, i64, i32)
 DEF_HELPER_4(stctg, void, env, i32, i64, i32)
-DEF_HELPER_FLAGS_2(tprot, TCG_CALL_CONST, i32, i64, i64)
-DEF_HELPER_FLAGS_2(iske, TCG_CALL_PURE|TCG_CALL_CONST, i64, env, i64)
-DEF_HELPER_FLAGS_3(sske, TCG_CALL_CONST, void, env, i32, i64)
-DEF_HELPER_FLAGS_3(rrbe, TCG_CALL_CONST, i32, env, i32, i64)
+DEF_HELPER_FLAGS_2(tprot, TCG_CALL_NO_RWG, i32, i64, i64)
+DEF_HELPER_FLAGS_2(iske, TCG_CALL_NO_RWG_SE, i64, env, i64)
+DEF_HELPER_FLAGS_3(sske, TCG_CALL_NO_RWG, void, env, i32, i64)
+DEF_HELPER_FLAGS_3(rrbe, TCG_CALL_NO_RWG, i32, env, i32, i64)
 DEF_HELPER_3(csp, i32, env, i32, i32)
 DEF_HELPER_4(mvcs, i32, env, i64, i64, i64)
 DEF_HELPER_4(mvcp, i32, env, i64, i64, i64)
 DEF_HELPER_4(sigp, i32, env, i64, i32, i64)
 DEF_HELPER_2(sacf, void, env, i64)
-DEF_HELPER_FLAGS_3(ipte, TCG_CALL_CONST, void, env, i64, i64)
-DEF_HELPER_FLAGS_1(ptlb, TCG_CALL_CONST, void, env)
+DEF_HELPER_FLAGS_3(ipte, TCG_CALL_NO_RWG, void, env, i64, i64)
+DEF_HELPER_FLAGS_1(ptlb, TCG_CALL_NO_RWG, void, env)
 DEF_HELPER_3(lra, i32, env, i64, i32)
 DEF_HELPER_3(stura, void, env, i64, i32)
 DEF_HELPER_3(cksm, void, env, i32, i32)
 
-DEF_HELPER_FLAGS_5(calc_cc, TCG_CALL_PURE|TCG_CALL_CONST,
+DEF_HELPER_FLAGS_5(calc_cc, TCG_CALL_NO_RWG_SE,
                    i32, env, i32, i64, i64, i64)
 
 #include "def-helper.h"
commit 8f885ef88a3fa26c26e98150219884da607d46ed
Author: Aurelien Jarno <aurelien at aurel32.net>
Date:   Tue Oct 9 21:53:09 2012 +0200

    target-ppc: rename helper flags
    
    Rename helper flags to the new ones. This is purely a mechanical change,
    it's possible to use better flags by looking at the helpers.
    
    Cc: Alexander Graf <agraf at suse.de>
    Reviewed-by: Richard Henderson <rth at twiddle.net>
    Signed-off-by: Aurelien Jarno <aurelien at aurel32.net>

diff --git a/target-ppc/helper.h b/target-ppc/helper.h
index fd04c06..e588370 100644
--- a/target-ppc/helper.h
+++ b/target-ppc/helper.h
@@ -31,24 +31,24 @@ DEF_HELPER_2(icbi, void, env, tl)
 DEF_HELPER_5(lscbx, tl, env, tl, i32, i32, i32)
 
 #if defined(TARGET_PPC64)
-DEF_HELPER_FLAGS_2(mulhd, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
-DEF_HELPER_FLAGS_2(mulhdu, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(mulhd, TCG_CALL_NO_RWG_SE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(mulhdu, TCG_CALL_NO_RWG_SE, i64, i64, i64)
 DEF_HELPER_3(mulldo, i64, env, i64, i64)
 #endif
 
-DEF_HELPER_FLAGS_1(cntlzw, TCG_CALL_CONST | TCG_CALL_PURE, tl, tl)
-DEF_HELPER_FLAGS_1(popcntb, TCG_CALL_CONST | TCG_CALL_PURE, tl, tl)
-DEF_HELPER_FLAGS_1(popcntw, TCG_CALL_CONST | TCG_CALL_PURE, tl, tl)
+DEF_HELPER_FLAGS_1(cntlzw, TCG_CALL_NO_RWG_SE, tl, tl)
+DEF_HELPER_FLAGS_1(popcntb, TCG_CALL_NO_RWG_SE, tl, tl)
+DEF_HELPER_FLAGS_1(popcntw, TCG_CALL_NO_RWG_SE, tl, tl)
 DEF_HELPER_3(sraw, tl, env, tl, tl)
 #if defined(TARGET_PPC64)
-DEF_HELPER_FLAGS_1(cntlzd, TCG_CALL_CONST | TCG_CALL_PURE, tl, tl)
-DEF_HELPER_FLAGS_1(popcntd, TCG_CALL_CONST | TCG_CALL_PURE, tl, tl)
+DEF_HELPER_FLAGS_1(cntlzd, TCG_CALL_NO_RWG_SE, tl, tl)
+DEF_HELPER_FLAGS_1(popcntd, TCG_CALL_NO_RWG_SE, tl, tl)
 DEF_HELPER_3(srad, tl, env, tl, tl)
 #endif
 
-DEF_HELPER_FLAGS_1(cntlsw32, TCG_CALL_CONST | TCG_CALL_PURE, i32, i32)
-DEF_HELPER_FLAGS_1(cntlzw32, TCG_CALL_CONST | TCG_CALL_PURE, i32, i32)
-DEF_HELPER_FLAGS_2(brinc, TCG_CALL_CONST | TCG_CALL_PURE, tl, tl, tl)
+DEF_HELPER_FLAGS_1(cntlsw32, TCG_CALL_NO_RWG_SE, i32, i32)
+DEF_HELPER_FLAGS_1(cntlzw32, TCG_CALL_NO_RWG_SE, i32, i32)
+DEF_HELPER_FLAGS_2(brinc, TCG_CALL_NO_RWG_SE, tl, tl, tl)
 
 DEF_HELPER_1(float_check_status, void, env)
 DEF_HELPER_1(reset_fpstatus, void, env)
@@ -345,25 +345,25 @@ DEF_HELPER_2(6xx_tlbd, void, env, tl)
 DEF_HELPER_2(6xx_tlbi, void, env, tl)
 DEF_HELPER_2(74xx_tlbd, void, env, tl)
 DEF_HELPER_2(74xx_tlbi, void, env, tl)
-DEF_HELPER_FLAGS_1(tlbia, TCG_CALL_CONST, void, env)
-DEF_HELPER_FLAGS_2(tlbie, TCG_CALL_CONST, void, env, tl)
+DEF_HELPER_FLAGS_1(tlbia, TCG_CALL_NO_RWG, void, env)
+DEF_HELPER_FLAGS_2(tlbie, TCG_CALL_NO_RWG, void, env, tl)
 #if defined(TARGET_PPC64)
-DEF_HELPER_FLAGS_3(store_slb, TCG_CALL_CONST, void, env, tl, tl)
+DEF_HELPER_FLAGS_3(store_slb, TCG_CALL_NO_RWG, void, env, tl, tl)
 DEF_HELPER_2(load_slb_esid, tl, env, tl)
 DEF_HELPER_2(load_slb_vsid, tl, env, tl)
-DEF_HELPER_FLAGS_1(slbia, TCG_CALL_CONST, void, env)
-DEF_HELPER_FLAGS_2(slbie, TCG_CALL_CONST, void, env, tl)
+DEF_HELPER_FLAGS_1(slbia, TCG_CALL_NO_RWG, void, env)
+DEF_HELPER_FLAGS_2(slbie, TCG_CALL_NO_RWG, void, env, tl)
 #endif
-DEF_HELPER_FLAGS_2(load_sr, TCG_CALL_CONST, tl, env, tl);
-DEF_HELPER_FLAGS_3(store_sr, TCG_CALL_CONST, void, env, tl, tl)
+DEF_HELPER_FLAGS_2(load_sr, TCG_CALL_NO_RWG, tl, env, tl);
+DEF_HELPER_FLAGS_3(store_sr, TCG_CALL_NO_RWG, void, env, tl, tl)
 
-DEF_HELPER_FLAGS_1(602_mfrom, TCG_CALL_CONST | TCG_CALL_PURE, tl, tl)
+DEF_HELPER_FLAGS_1(602_mfrom, TCG_CALL_NO_RWG_SE, tl, tl)
 DEF_HELPER_1(msgsnd, void, tl)
 DEF_HELPER_2(msgclr, void, env, tl)
 #endif
 
 DEF_HELPER_4(dlmzb, tl, env, tl, tl, i32)
-DEF_HELPER_FLAGS_2(clcs, TCG_CALL_CONST | TCG_CALL_PURE, tl, env, i32)
+DEF_HELPER_FLAGS_2(clcs, TCG_CALL_NO_RWG_SE, tl, env, i32)
 #if !defined(CONFIG_USER_ONLY)
 DEF_HELPER_2(rac, tl, env, tl)
 #endif
commit 95bf787e40adead611f79cae6e8a57cc175df95e
Author: Aurelien Jarno <aurelien at aurel32.net>
Date:   Tue Oct 9 21:53:09 2012 +0200

    target-mips: rename helper flags
    
    Rename helper flags to the new ones. This is purely a mechanical change,
    it's possible to use better flags by looking at the helpers.
    
    Reviewed-by: Richard Henderson <rth at twiddle.net>
    Signed-off-by: Aurelien Jarno <aurelien at aurel32.net>

diff --git a/target-mips/helper.h b/target-mips/helper.h
index f35ed78..43ac39f 100644
--- a/target-mips/helper.h
+++ b/target-mips/helper.h
@@ -23,11 +23,11 @@ DEF_HELPER_4(scd, tl, env, tl, tl, int)
 #endif
 #endif
 
-DEF_HELPER_FLAGS_1(clo, TCG_CALL_CONST | TCG_CALL_PURE, tl, tl)
-DEF_HELPER_FLAGS_1(clz, TCG_CALL_CONST | TCG_CALL_PURE, tl, tl)
+DEF_HELPER_FLAGS_1(clo, TCG_CALL_NO_RWG_SE, tl, tl)
+DEF_HELPER_FLAGS_1(clz, TCG_CALL_NO_RWG_SE, tl, tl)
 #ifdef TARGET_MIPS64
-DEF_HELPER_FLAGS_1(dclo, TCG_CALL_CONST | TCG_CALL_PURE, tl, tl)
-DEF_HELPER_FLAGS_1(dclz, TCG_CALL_CONST | TCG_CALL_PURE, tl, tl)
+DEF_HELPER_FLAGS_1(dclo, TCG_CALL_NO_RWG_SE, tl, tl)
+DEF_HELPER_FLAGS_1(dclz, TCG_CALL_NO_RWG_SE, tl, tl)
 DEF_HELPER_3(dmult, void, env, tl, tl)
 DEF_HELPER_3(dmultu, void, env, tl, tl)
 #endif
@@ -304,62 +304,62 @@ DEF_HELPER_2(pmon, void, env, int)
 DEF_HELPER_1(wait, void, env)
 
 /* Loongson multimedia functions.  */
-DEF_HELPER_FLAGS_2(paddsh, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
-DEF_HELPER_FLAGS_2(paddush, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
-DEF_HELPER_FLAGS_2(paddh, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
-DEF_HELPER_FLAGS_2(paddw, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
-DEF_HELPER_FLAGS_2(paddsb, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
-DEF_HELPER_FLAGS_2(paddusb, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
-DEF_HELPER_FLAGS_2(paddb, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(paddsh, TCG_CALL_NO_RWG_SE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(paddush, TCG_CALL_NO_RWG_SE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(paddh, TCG_CALL_NO_RWG_SE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(paddw, TCG_CALL_NO_RWG_SE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(paddsb, TCG_CALL_NO_RWG_SE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(paddusb, TCG_CALL_NO_RWG_SE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(paddb, TCG_CALL_NO_RWG_SE, i64, i64, i64)
 
-DEF_HELPER_FLAGS_2(psubsh, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
-DEF_HELPER_FLAGS_2(psubush, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
-DEF_HELPER_FLAGS_2(psubh, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
-DEF_HELPER_FLAGS_2(psubw, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
-DEF_HELPER_FLAGS_2(psubsb, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
-DEF_HELPER_FLAGS_2(psubusb, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
-DEF_HELPER_FLAGS_2(psubb, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(psubsh, TCG_CALL_NO_RWG_SE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(psubush, TCG_CALL_NO_RWG_SE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(psubh, TCG_CALL_NO_RWG_SE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(psubw, TCG_CALL_NO_RWG_SE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(psubsb, TCG_CALL_NO_RWG_SE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(psubusb, TCG_CALL_NO_RWG_SE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(psubb, TCG_CALL_NO_RWG_SE, i64, i64, i64)
 
-DEF_HELPER_FLAGS_2(pshufh, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
-DEF_HELPER_FLAGS_2(packsswh, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
-DEF_HELPER_FLAGS_2(packsshb, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
-DEF_HELPER_FLAGS_2(packushb, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(pshufh, TCG_CALL_NO_RWG_SE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(packsswh, TCG_CALL_NO_RWG_SE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(packsshb, TCG_CALL_NO_RWG_SE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(packushb, TCG_CALL_NO_RWG_SE, i64, i64, i64)
 
-DEF_HELPER_FLAGS_2(punpcklhw, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
-DEF_HELPER_FLAGS_2(punpckhhw, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
-DEF_HELPER_FLAGS_2(punpcklbh, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
-DEF_HELPER_FLAGS_2(punpckhbh, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
-DEF_HELPER_FLAGS_2(punpcklwd, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
-DEF_HELPER_FLAGS_2(punpckhwd, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(punpcklhw, TCG_CALL_NO_RWG_SE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(punpckhhw, TCG_CALL_NO_RWG_SE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(punpcklbh, TCG_CALL_NO_RWG_SE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(punpckhbh, TCG_CALL_NO_RWG_SE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(punpcklwd, TCG_CALL_NO_RWG_SE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(punpckhwd, TCG_CALL_NO_RWG_SE, i64, i64, i64)
 
-DEF_HELPER_FLAGS_2(pavgh, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
-DEF_HELPER_FLAGS_2(pavgb, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
-DEF_HELPER_FLAGS_2(pmaxsh, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
-DEF_HELPER_FLAGS_2(pminsh, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
-DEF_HELPER_FLAGS_2(pmaxub, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
-DEF_HELPER_FLAGS_2(pminub, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(pavgh, TCG_CALL_NO_RWG_SE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(pavgb, TCG_CALL_NO_RWG_SE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(pmaxsh, TCG_CALL_NO_RWG_SE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(pminsh, TCG_CALL_NO_RWG_SE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(pmaxub, TCG_CALL_NO_RWG_SE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(pminub, TCG_CALL_NO_RWG_SE, i64, i64, i64)
 
-DEF_HELPER_FLAGS_2(pcmpeqw, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
-DEF_HELPER_FLAGS_2(pcmpgtw, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
-DEF_HELPER_FLAGS_2(pcmpeqh, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
-DEF_HELPER_FLAGS_2(pcmpgth, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
-DEF_HELPER_FLAGS_2(pcmpeqb, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
-DEF_HELPER_FLAGS_2(pcmpgtb, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(pcmpeqw, TCG_CALL_NO_RWG_SE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(pcmpgtw, TCG_CALL_NO_RWG_SE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(pcmpeqh, TCG_CALL_NO_RWG_SE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(pcmpgth, TCG_CALL_NO_RWG_SE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(pcmpeqb, TCG_CALL_NO_RWG_SE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(pcmpgtb, TCG_CALL_NO_RWG_SE, i64, i64, i64)
 
-DEF_HELPER_FLAGS_2(psllw, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
-DEF_HELPER_FLAGS_2(psllh, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
-DEF_HELPER_FLAGS_2(psrlw, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
-DEF_HELPER_FLAGS_2(psrlh, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
-DEF_HELPER_FLAGS_2(psraw, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
-DEF_HELPER_FLAGS_2(psrah, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(psllw, TCG_CALL_NO_RWG_SE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(psllh, TCG_CALL_NO_RWG_SE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(psrlw, TCG_CALL_NO_RWG_SE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(psrlh, TCG_CALL_NO_RWG_SE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(psraw, TCG_CALL_NO_RWG_SE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(psrah, TCG_CALL_NO_RWG_SE, i64, i64, i64)
 
-DEF_HELPER_FLAGS_2(pmullh, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
-DEF_HELPER_FLAGS_2(pmulhh, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
-DEF_HELPER_FLAGS_2(pmulhuh, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
-DEF_HELPER_FLAGS_2(pmaddhw, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(pmullh, TCG_CALL_NO_RWG_SE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(pmulhh, TCG_CALL_NO_RWG_SE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(pmulhuh, TCG_CALL_NO_RWG_SE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(pmaddhw, TCG_CALL_NO_RWG_SE, i64, i64, i64)
 
-DEF_HELPER_FLAGS_2(pasubub, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
-DEF_HELPER_FLAGS_1(biadd, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64)
-DEF_HELPER_FLAGS_1(pmovmskb, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64)
+DEF_HELPER_FLAGS_2(pasubub, TCG_CALL_NO_RWG_SE, i64, i64, i64)
+DEF_HELPER_FLAGS_1(biadd, TCG_CALL_NO_RWG_SE, i64, i64)
+DEF_HELPER_FLAGS_1(pmovmskb, TCG_CALL_NO_RWG_SE, i64, i64)
 
 #include "def-helper.h"
commit 2a997687b867d3fa005ae6abca01121e68e5c02e
Author: Aurelien Jarno <aurelien at aurel32.net>
Date:   Tue Oct 9 21:53:09 2012 +0200

    target-microblaze: rename helper flags
    
    Rename helper flags to the new ones. This is purely a mechanical change,
    it's possible to use better flags by looking at the helpers.
    
    Acked-by: Edgar E. Iglesias <edgar.iglesias at gmail.com>
    Reviewed-by: Richard Henderson <rth at twiddle.net>
    Signed-off-by: Aurelien Jarno <aurelien at aurel32.net>

diff --git a/target-microblaze/helper.h b/target-microblaze/helper.h
index a1a732c..a667122 100644
--- a/target-microblaze/helper.h
+++ b/target-microblaze/helper.h
@@ -2,10 +2,10 @@
 
 DEF_HELPER_2(raise_exception, void, env, i32)
 DEF_HELPER_1(debug, void, env)
-DEF_HELPER_FLAGS_3(carry, TCG_CALL_PURE | TCG_CALL_CONST, i32, i32, i32, i32)
+DEF_HELPER_FLAGS_3(carry, TCG_CALL_NO_RWG_SE, i32, i32, i32, i32)
 DEF_HELPER_2(cmp, i32, i32, i32)
 DEF_HELPER_2(cmpu, i32, i32, i32)
-DEF_HELPER_FLAGS_1(clz, TCG_CALL_PURE | TCG_CALL_CONST, i32, i32)
+DEF_HELPER_FLAGS_1(clz, TCG_CALL_NO_RWG_SE, i32, i32)
 
 DEF_HELPER_3(divs, i32, env, i32, i32)
 DEF_HELPER_3(divu, i32, env, i32, i32)
@@ -26,7 +26,7 @@ DEF_HELPER_3(fcmp_gt, i32, env, i32, i32)
 DEF_HELPER_3(fcmp_ne, i32, env, i32, i32)
 DEF_HELPER_3(fcmp_ge, i32, env, i32, i32)
 
-DEF_HELPER_FLAGS_2(pcmpbf, TCG_CALL_PURE | TCG_CALL_CONST, i32, i32, i32)
+DEF_HELPER_FLAGS_2(pcmpbf, TCG_CALL_NO_RWG_SE, i32, i32, i32)
 #if !defined(CONFIG_USER_ONLY)
 DEF_HELPER_2(mmu_read, i32, env, i32)
 DEF_HELPER_3(mmu_write, void, env, i32, i32)
commit 95b638a29214173d4de04567b8d08f4064580abd
Author: Aurelien Jarno <aurelien at aurel32.net>
Date:   Tue Oct 9 21:53:08 2012 +0200

    target-i386: rename helper flags
    
    Rename helper flags to the new ones. This is purely a mechanical change,
    it's possible to use better flags by looking at the helpers.
    
    Reviewed-by: Richard Henderson <rth at twiddle.net>
    Signed-off-by: Aurelien Jarno <aurelien at aurel32.net>

diff --git a/target-i386/helper.h b/target-i386/helper.h
index 93850ce..970fcd9 100644
--- a/target-i386/helper.h
+++ b/target-i386/helper.h
@@ -1,7 +1,7 @@
 #include "def-helper.h"
 
-DEF_HELPER_FLAGS_2(cc_compute_all, TCG_CALL_PURE, i32, env, int)
-DEF_HELPER_FLAGS_2(cc_compute_c, TCG_CALL_PURE, i32, env, int)
+DEF_HELPER_FLAGS_2(cc_compute_all, TCG_CALL_NO_SE, i32, env, int)
+DEF_HELPER_FLAGS_2(cc_compute_c, TCG_CALL_NO_SE, i32, env, int)
 
 DEF_HELPER_0(lock, void)
 DEF_HELPER_0(unlock, void)
commit 3e2bed86a24e13970bcc50527e21fe75f409f7ad
Author: Aurelien Jarno <aurelien at aurel32.net>
Date:   Tue Oct 9 21:53:08 2012 +0200

    target-cris: rename helper flags
    
    Rename helper flags to the new ones. This is purely a mechanical change,
    it's possible to use better flags by looking at the helpers.
    
    Acked-by: Edgar E. Iglesias <edgar.iglesias at gmail.com>
    Reviewed-by: Richard Henderson <rth at twiddle.net>
    Signed-off-by: Aurelien Jarno <aurelien at aurel32.net>

diff --git a/target-cris/helper.h b/target-cris/helper.h
index 99fb326..fe12083 100644
--- a/target-cris/helper.h
+++ b/target-cris/helper.h
@@ -10,19 +10,19 @@ DEF_HELPER_1(rfn, void, env);
 DEF_HELPER_3(movl_sreg_reg, void, env, i32, i32)
 DEF_HELPER_3(movl_reg_sreg, void, env, i32, i32)
 
-DEF_HELPER_FLAGS_1(lz, TCG_CALL_PURE, i32, i32);
-DEF_HELPER_FLAGS_4(btst, TCG_CALL_PURE, i32, env, i32, i32, i32);
+DEF_HELPER_FLAGS_1(lz, TCG_CALL_NO_SE, i32, i32);
+DEF_HELPER_FLAGS_4(btst, TCG_CALL_NO_SE, i32, env, i32, i32, i32);
 
-DEF_HELPER_FLAGS_4(evaluate_flags_muls, TCG_CALL_PURE, i32, env, i32, i32, i32)
-DEF_HELPER_FLAGS_4(evaluate_flags_mulu, TCG_CALL_PURE, i32, env, i32, i32, i32)
-DEF_HELPER_FLAGS_5(evaluate_flags_mcp, TCG_CALL_PURE, i32, env,
+DEF_HELPER_FLAGS_4(evaluate_flags_muls, TCG_CALL_NO_SE, i32, env, i32, i32, i32)
+DEF_HELPER_FLAGS_4(evaluate_flags_mulu, TCG_CALL_NO_SE, i32, env, i32, i32, i32)
+DEF_HELPER_FLAGS_5(evaluate_flags_mcp, TCG_CALL_NO_SE, i32, env,
                                                       i32, i32, i32, i32)
-DEF_HELPER_FLAGS_5(evaluate_flags_alu_4, TCG_CALL_PURE, i32, env,
+DEF_HELPER_FLAGS_5(evaluate_flags_alu_4, TCG_CALL_NO_SE, i32, env,
                                                         i32, i32, i32, i32)
-DEF_HELPER_FLAGS_5(evaluate_flags_sub_4, TCG_CALL_PURE, i32, env,
+DEF_HELPER_FLAGS_5(evaluate_flags_sub_4, TCG_CALL_NO_SE, i32, env,
                                                         i32, i32, i32, i32)
-DEF_HELPER_FLAGS_3(evaluate_flags_move_4, TCG_CALL_PURE, i32, env, i32, i32)
-DEF_HELPER_FLAGS_3(evaluate_flags_move_2, TCG_CALL_PURE, i32, env, i32, i32)
+DEF_HELPER_FLAGS_3(evaluate_flags_move_4, TCG_CALL_NO_SE, i32, env, i32, i32)
+DEF_HELPER_FLAGS_3(evaluate_flags_move_2, TCG_CALL_NO_SE, i32, env, i32, i32)
 DEF_HELPER_1(evaluate_flags, void, env)
 DEF_HELPER_1(top_evaluate_flags, void, env)
 
commit 4b3d4829abfdeb5078195898997e73724898c41e
Author: Aurelien Jarno <aurelien at aurel32.net>
Date:   Tue Oct 9 21:53:08 2012 +0200

    target-arm: rename helper flags
    
    Rename helper flags to the new ones. This is purely a mechanical change,
    it's possible to use better flags by looking at the helpers.
    
    Cc: Paul Brook <paul at codesourcery.com>
    Cc: Peter Maydell <peter.maydell at linaro.org>
    Reviewed-by: Richard Henderson <rth at twiddle.net>
    Signed-off-by: Aurelien Jarno <aurelien at aurel32.net>

diff --git a/target-arm/helper.h b/target-arm/helper.h
index 60812de..3d23ceb 100644
--- a/target-arm/helper.h
+++ b/target-arm/helper.h
@@ -1,8 +1,8 @@
 #include "def-helper.h"
 
-DEF_HELPER_FLAGS_1(clz, TCG_CALL_CONST | TCG_CALL_PURE, i32, i32)
-DEF_HELPER_FLAGS_1(sxtb16, TCG_CALL_CONST | TCG_CALL_PURE, i32, i32)
-DEF_HELPER_FLAGS_1(uxtb16, TCG_CALL_CONST | TCG_CALL_PURE, i32, i32)
+DEF_HELPER_FLAGS_1(clz, TCG_CALL_NO_RWG_SE, i32, i32)
+DEF_HELPER_FLAGS_1(sxtb16, TCG_CALL_NO_RWG_SE, i32, i32)
+DEF_HELPER_FLAGS_1(uxtb16, TCG_CALL_NO_RWG_SE, i32, i32)
 
 DEF_HELPER_3(add_setq, i32, env, i32, i32)
 DEF_HELPER_3(add_saturate, i32, env, i32, i32)
@@ -10,9 +10,9 @@ DEF_HELPER_3(sub_saturate, i32, env, i32, i32)
 DEF_HELPER_3(add_usaturate, i32, env, i32, i32)
 DEF_HELPER_3(sub_usaturate, i32, env, i32, i32)
 DEF_HELPER_2(double_saturate, i32, env, s32)
-DEF_HELPER_FLAGS_2(sdiv, TCG_CALL_CONST | TCG_CALL_PURE, s32, s32, s32)
-DEF_HELPER_FLAGS_2(udiv, TCG_CALL_CONST | TCG_CALL_PURE, i32, i32, i32)
-DEF_HELPER_FLAGS_1(rbit, TCG_CALL_CONST | TCG_CALL_PURE, i32, i32)
+DEF_HELPER_FLAGS_2(sdiv, TCG_CALL_NO_RWG_SE, s32, s32, s32)
+DEF_HELPER_FLAGS_2(udiv, TCG_CALL_NO_RWG_SE, i32, i32, i32)
+DEF_HELPER_FLAGS_1(rbit, TCG_CALL_NO_RWG_SE, i32, i32)
 
 #define PAS_OP(pfx)  \
     DEF_HELPER_3(pfx ## add8, i32, i32, i32, ptr) \
@@ -44,11 +44,11 @@ DEF_HELPER_3(usat, i32, env, i32, i32)
 DEF_HELPER_3(ssat16, i32, env, i32, i32)
 DEF_HELPER_3(usat16, i32, env, i32, i32)
 
-DEF_HELPER_FLAGS_2(usad8, TCG_CALL_CONST | TCG_CALL_PURE, i32, i32, i32)
+DEF_HELPER_FLAGS_2(usad8, TCG_CALL_NO_RWG_SE, i32, i32, i32)
 
 DEF_HELPER_1(logicq_cc, i32, i64)
 
-DEF_HELPER_FLAGS_3(sel_flags, TCG_CALL_CONST | TCG_CALL_PURE,
+DEF_HELPER_FLAGS_3(sel_flags, TCG_CALL_NO_RWG_SE,
                    i32, i32, i32, i32)
 DEF_HELPER_2(exception, void, env, i32)
 DEF_HELPER_1(wfi, void, env)
commit 0c0dcdfed0833539ed03560d251c31463c0eb9f9
Author: Aurelien Jarno <aurelien at aurel32.net>
Date:   Tue Oct 9 21:53:08 2012 +0200

    target-alpha: rename helper flags
    
    Rename helper flags to the new ones. This is purely a mechanical change,
    it's possible to use better flags by looking at the helpers.
    
    Acked-by: Richard Henderson <rth at twiddle.net>
    Signed-off-by: Aurelien Jarno <aurelien at aurel32.net>

diff --git a/target-alpha/helper.h b/target-alpha/helper.h
index a184def..162816f 100644
--- a/target-alpha/helper.h
+++ b/target-alpha/helper.h
@@ -1,7 +1,7 @@
 #include "def-helper.h"
 
 DEF_HELPER_3(excp, noreturn, env, int, int)
-DEF_HELPER_FLAGS_1(load_pcc, TCG_CALL_CONST | TCG_CALL_PURE, i64, env)
+DEF_HELPER_FLAGS_1(load_pcc, TCG_CALL_NO_RWG_SE, i64, env)
 
 DEF_HELPER_3(addqv, i64, env, i64, i64)
 DEF_HELPER_3(addlv, i64, env, i64, i64)
@@ -9,89 +9,89 @@ DEF_HELPER_3(subqv, i64, env, i64, i64)
 DEF_HELPER_3(sublv, i64, env, i64, i64)
 DEF_HELPER_3(mullv, i64, env, i64, i64)
 DEF_HELPER_3(mulqv, i64, env, i64, i64)
-DEF_HELPER_FLAGS_2(umulh, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
-
-DEF_HELPER_FLAGS_1(ctpop, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64)
-DEF_HELPER_FLAGS_1(ctlz, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64)
-DEF_HELPER_FLAGS_1(cttz, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64)
-
-DEF_HELPER_FLAGS_2(zap, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
-DEF_HELPER_FLAGS_2(zapnot, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
-
-DEF_HELPER_FLAGS_2(cmpbge, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
-
-DEF_HELPER_FLAGS_2(minub8, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
-DEF_HELPER_FLAGS_2(minsb8, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
-DEF_HELPER_FLAGS_2(minuw4, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
-DEF_HELPER_FLAGS_2(minsw4, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
-DEF_HELPER_FLAGS_2(maxub8, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
-DEF_HELPER_FLAGS_2(maxsb8, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
-DEF_HELPER_FLAGS_2(maxuw4, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
-DEF_HELPER_FLAGS_2(maxsw4, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
-DEF_HELPER_FLAGS_2(perr, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
-DEF_HELPER_FLAGS_1(pklb, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64)
-DEF_HELPER_FLAGS_1(pkwb, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64)
-DEF_HELPER_FLAGS_1(unpkbl, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64)
-DEF_HELPER_FLAGS_1(unpkbw, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64)
-
-DEF_HELPER_FLAGS_1(load_fpcr, TCG_CALL_CONST | TCG_CALL_PURE, i64, env)
-DEF_HELPER_FLAGS_2(store_fpcr, TCG_CALL_CONST, void, env, i64)
-
-DEF_HELPER_FLAGS_1(f_to_memory, TCG_CALL_CONST | TCG_CALL_PURE, i32, i64)
-DEF_HELPER_FLAGS_1(memory_to_f, TCG_CALL_CONST | TCG_CALL_PURE, i64, i32)
-DEF_HELPER_FLAGS_3(addf, TCG_CALL_CONST, i64, env, i64, i64)
-DEF_HELPER_FLAGS_3(subf, TCG_CALL_CONST, i64, env, i64, i64)
-DEF_HELPER_FLAGS_3(mulf, TCG_CALL_CONST, i64, env, i64, i64)
-DEF_HELPER_FLAGS_3(divf, TCG_CALL_CONST, i64, env, i64, i64)
-DEF_HELPER_FLAGS_2(sqrtf, TCG_CALL_CONST, i64, env, i64)
-
-DEF_HELPER_FLAGS_1(g_to_memory, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64)
-DEF_HELPER_FLAGS_1(memory_to_g, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64)
-DEF_HELPER_FLAGS_3(addg, TCG_CALL_CONST, i64, env, i64, i64)
-DEF_HELPER_FLAGS_3(subg, TCG_CALL_CONST, i64, env, i64, i64)
-DEF_HELPER_FLAGS_3(mulg, TCG_CALL_CONST, i64, env, i64, i64)
-DEF_HELPER_FLAGS_3(divg, TCG_CALL_CONST, i64, env, i64, i64)
-DEF_HELPER_FLAGS_2(sqrtg, TCG_CALL_CONST, i64, env, i64)
-
-DEF_HELPER_FLAGS_1(s_to_memory, TCG_CALL_CONST | TCG_CALL_PURE, i32, i64)
-DEF_HELPER_FLAGS_1(memory_to_s, TCG_CALL_CONST | TCG_CALL_PURE, i64, i32)
-DEF_HELPER_FLAGS_3(adds, TCG_CALL_CONST, i64, env, i64, i64)
-DEF_HELPER_FLAGS_3(subs, TCG_CALL_CONST, i64, env, i64, i64)
-DEF_HELPER_FLAGS_3(muls, TCG_CALL_CONST, i64, env, i64, i64)
-DEF_HELPER_FLAGS_3(divs, TCG_CALL_CONST, i64, env, i64, i64)
-DEF_HELPER_FLAGS_2(sqrts, TCG_CALL_CONST, i64, env, i64)
-
-DEF_HELPER_FLAGS_3(addt, TCG_CALL_CONST, i64, env, i64, i64)
-DEF_HELPER_FLAGS_3(subt, TCG_CALL_CONST, i64, env, i64, i64)
-DEF_HELPER_FLAGS_3(mult, TCG_CALL_CONST, i64, env, i64, i64)
-DEF_HELPER_FLAGS_3(divt, TCG_CALL_CONST, i64, env, i64, i64)
-DEF_HELPER_FLAGS_2(sqrtt, TCG_CALL_CONST, i64, env, i64)
-
-DEF_HELPER_FLAGS_3(cmptun, TCG_CALL_CONST, i64, env, i64, i64)
-DEF_HELPER_FLAGS_3(cmpteq, TCG_CALL_CONST, i64, env, i64, i64)
-DEF_HELPER_FLAGS_3(cmptle, TCG_CALL_CONST, i64, env, i64, i64)
-DEF_HELPER_FLAGS_3(cmptlt, TCG_CALL_CONST, i64, env, i64, i64)
-DEF_HELPER_FLAGS_3(cmpgeq, TCG_CALL_CONST, i64, env, i64, i64)
-DEF_HELPER_FLAGS_3(cmpgle, TCG_CALL_CONST, i64, env, i64, i64)
-DEF_HELPER_FLAGS_3(cmpglt, TCG_CALL_CONST, i64, env, i64, i64)
-
-DEF_HELPER_FLAGS_2(cvtts, TCG_CALL_CONST, i64, env, i64)
-DEF_HELPER_FLAGS_2(cvtst, TCG_CALL_CONST, i64, env, i64)
-DEF_HELPER_FLAGS_2(cvtqs, TCG_CALL_CONST, i64, env, i64)
-DEF_HELPER_FLAGS_2(cvtqt, TCG_CALL_CONST, i64, env, i64)
-DEF_HELPER_FLAGS_2(cvtqf, TCG_CALL_CONST, i64, env, i64)
-DEF_HELPER_FLAGS_2(cvtgf, TCG_CALL_CONST, i64, env, i64)
-DEF_HELPER_FLAGS_2(cvtgq, TCG_CALL_CONST, i64, env, i64)
-DEF_HELPER_FLAGS_2(cvtqg, TCG_CALL_CONST, i64, env, i64)
-
-DEF_HELPER_FLAGS_2(cvttq, TCG_CALL_CONST, i64, env, i64)
-DEF_HELPER_FLAGS_2(cvttq_c, TCG_CALL_CONST, i64, env, i64)
-DEF_HELPER_FLAGS_2(cvttq_svic, TCG_CALL_CONST, i64, env, i64)
-
-DEF_HELPER_FLAGS_2(setroundmode, TCG_CALL_CONST, void, env, i32)
-DEF_HELPER_FLAGS_2(setflushzero, TCG_CALL_CONST, void, env, i32)
-DEF_HELPER_FLAGS_1(fp_exc_clear, TCG_CALL_CONST, void, env)
-DEF_HELPER_FLAGS_1(fp_exc_get, TCG_CALL_CONST | TCG_CALL_PURE, i32, env)
+DEF_HELPER_FLAGS_2(umulh, TCG_CALL_NO_RWG_SE, i64, i64, i64)
+
+DEF_HELPER_FLAGS_1(ctpop, TCG_CALL_NO_RWG_SE, i64, i64)
+DEF_HELPER_FLAGS_1(ctlz, TCG_CALL_NO_RWG_SE, i64, i64)
+DEF_HELPER_FLAGS_1(cttz, TCG_CALL_NO_RWG_SE, i64, i64)
+
+DEF_HELPER_FLAGS_2(zap, TCG_CALL_NO_RWG_SE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(zapnot, TCG_CALL_NO_RWG_SE, i64, i64, i64)
+
+DEF_HELPER_FLAGS_2(cmpbge, TCG_CALL_NO_RWG_SE, i64, i64, i64)
+
+DEF_HELPER_FLAGS_2(minub8, TCG_CALL_NO_RWG_SE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(minsb8, TCG_CALL_NO_RWG_SE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(minuw4, TCG_CALL_NO_RWG_SE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(minsw4, TCG_CALL_NO_RWG_SE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(maxub8, TCG_CALL_NO_RWG_SE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(maxsb8, TCG_CALL_NO_RWG_SE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(maxuw4, TCG_CALL_NO_RWG_SE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(maxsw4, TCG_CALL_NO_RWG_SE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(perr, TCG_CALL_NO_RWG_SE, i64, i64, i64)
+DEF_HELPER_FLAGS_1(pklb, TCG_CALL_NO_RWG_SE, i64, i64)
+DEF_HELPER_FLAGS_1(pkwb, TCG_CALL_NO_RWG_SE, i64, i64)
+DEF_HELPER_FLAGS_1(unpkbl, TCG_CALL_NO_RWG_SE, i64, i64)
+DEF_HELPER_FLAGS_1(unpkbw, TCG_CALL_NO_RWG_SE, i64, i64)
+
+DEF_HELPER_FLAGS_1(load_fpcr, TCG_CALL_NO_RWG_SE, i64, env)
+DEF_HELPER_FLAGS_2(store_fpcr, TCG_CALL_NO_RWG, void, env, i64)
+
+DEF_HELPER_FLAGS_1(f_to_memory, TCG_CALL_NO_RWG_SE, i32, i64)
+DEF_HELPER_FLAGS_1(memory_to_f, TCG_CALL_NO_RWG_SE, i64, i32)
+DEF_HELPER_FLAGS_3(addf, TCG_CALL_NO_RWG, i64, env, i64, i64)
+DEF_HELPER_FLAGS_3(subf, TCG_CALL_NO_RWG, i64, env, i64, i64)
+DEF_HELPER_FLAGS_3(mulf, TCG_CALL_NO_RWG, i64, env, i64, i64)
+DEF_HELPER_FLAGS_3(divf, TCG_CALL_NO_RWG, i64, env, i64, i64)
+DEF_HELPER_FLAGS_2(sqrtf, TCG_CALL_NO_RWG, i64, env, i64)
+
+DEF_HELPER_FLAGS_1(g_to_memory, TCG_CALL_NO_RWG_SE, i64, i64)
+DEF_HELPER_FLAGS_1(memory_to_g, TCG_CALL_NO_RWG_SE, i64, i64)
+DEF_HELPER_FLAGS_3(addg, TCG_CALL_NO_RWG, i64, env, i64, i64)
+DEF_HELPER_FLAGS_3(subg, TCG_CALL_NO_RWG, i64, env, i64, i64)
+DEF_HELPER_FLAGS_3(mulg, TCG_CALL_NO_RWG, i64, env, i64, i64)
+DEF_HELPER_FLAGS_3(divg, TCG_CALL_NO_RWG, i64, env, i64, i64)
+DEF_HELPER_FLAGS_2(sqrtg, TCG_CALL_NO_RWG, i64, env, i64)
+
+DEF_HELPER_FLAGS_1(s_to_memory, TCG_CALL_NO_RWG_SE, i32, i64)
+DEF_HELPER_FLAGS_1(memory_to_s, TCG_CALL_NO_RWG_SE, i64, i32)
+DEF_HELPER_FLAGS_3(adds, TCG_CALL_NO_RWG, i64, env, i64, i64)
+DEF_HELPER_FLAGS_3(subs, TCG_CALL_NO_RWG, i64, env, i64, i64)
+DEF_HELPER_FLAGS_3(muls, TCG_CALL_NO_RWG, i64, env, i64, i64)
+DEF_HELPER_FLAGS_3(divs, TCG_CALL_NO_RWG, i64, env, i64, i64)
+DEF_HELPER_FLAGS_2(sqrts, TCG_CALL_NO_RWG, i64, env, i64)
+
+DEF_HELPER_FLAGS_3(addt, TCG_CALL_NO_RWG, i64, env, i64, i64)
+DEF_HELPER_FLAGS_3(subt, TCG_CALL_NO_RWG, i64, env, i64, i64)
+DEF_HELPER_FLAGS_3(mult, TCG_CALL_NO_RWG, i64, env, i64, i64)
+DEF_HELPER_FLAGS_3(divt, TCG_CALL_NO_RWG, i64, env, i64, i64)
+DEF_HELPER_FLAGS_2(sqrtt, TCG_CALL_NO_RWG, i64, env, i64)
+
+DEF_HELPER_FLAGS_3(cmptun, TCG_CALL_NO_RWG, i64, env, i64, i64)
+DEF_HELPER_FLAGS_3(cmpteq, TCG_CALL_NO_RWG, i64, env, i64, i64)
+DEF_HELPER_FLAGS_3(cmptle, TCG_CALL_NO_RWG, i64, env, i64, i64)
+DEF_HELPER_FLAGS_3(cmptlt, TCG_CALL_NO_RWG, i64, env, i64, i64)
+DEF_HELPER_FLAGS_3(cmpgeq, TCG_CALL_NO_RWG, i64, env, i64, i64)
+DEF_HELPER_FLAGS_3(cmpgle, TCG_CALL_NO_RWG, i64, env, i64, i64)
+DEF_HELPER_FLAGS_3(cmpglt, TCG_CALL_NO_RWG, i64, env, i64, i64)
+
+DEF_HELPER_FLAGS_2(cvtts, TCG_CALL_NO_RWG, i64, env, i64)
+DEF_HELPER_FLAGS_2(cvtst, TCG_CALL_NO_RWG, i64, env, i64)
+DEF_HELPER_FLAGS_2(cvtqs, TCG_CALL_NO_RWG, i64, env, i64)
+DEF_HELPER_FLAGS_2(cvtqt, TCG_CALL_NO_RWG, i64, env, i64)
+DEF_HELPER_FLAGS_2(cvtqf, TCG_CALL_NO_RWG, i64, env, i64)
+DEF_HELPER_FLAGS_2(cvtgf, TCG_CALL_NO_RWG, i64, env, i64)
+DEF_HELPER_FLAGS_2(cvtgq, TCG_CALL_NO_RWG, i64, env, i64)
+DEF_HELPER_FLAGS_2(cvtqg, TCG_CALL_NO_RWG, i64, env, i64)
+
+DEF_HELPER_FLAGS_2(cvttq, TCG_CALL_NO_RWG, i64, env, i64)
+DEF_HELPER_FLAGS_2(cvttq_c, TCG_CALL_NO_RWG, i64, env, i64)
+DEF_HELPER_FLAGS_2(cvttq_svic, TCG_CALL_NO_RWG, i64, env, i64)
+
+DEF_HELPER_FLAGS_2(setroundmode, TCG_CALL_NO_RWG, void, env, i32)
+DEF_HELPER_FLAGS_2(setflushzero, TCG_CALL_NO_RWG, void, env, i32)
+DEF_HELPER_FLAGS_1(fp_exc_clear, TCG_CALL_NO_RWG, void, env)
+DEF_HELPER_FLAGS_1(fp_exc_get, TCG_CALL_NO_RWG_SE, i32, env)
 DEF_HELPER_3(fp_exc_raise, void, env, i32, i32)
 DEF_HELPER_3(fp_exc_raise_s, void, env, i32, i32)
 
@@ -110,13 +110,13 @@ DEF_HELPER_2(stq_phys, void, i64, i64)
 DEF_HELPER_3(stl_c_phys, i64, env, i64, i64)
 DEF_HELPER_3(stq_c_phys, i64, env, i64, i64)
 
-DEF_HELPER_FLAGS_1(tbia, TCG_CALL_CONST, void, env)
-DEF_HELPER_FLAGS_2(tbis, TCG_CALL_CONST, void, env, i64)
+DEF_HELPER_FLAGS_1(tbia, TCG_CALL_NO_RWG, void, env)
+DEF_HELPER_FLAGS_2(tbis, TCG_CALL_NO_RWG, void, env, i64)
 
 DEF_HELPER_1(halt, void, i64);
 
-DEF_HELPER_FLAGS_0(get_time, TCG_CALL_CONST, i64)
-DEF_HELPER_FLAGS_2(set_alarm, TCG_CALL_CONST, void, env, i64)
+DEF_HELPER_FLAGS_0(get_time, TCG_CALL_NO_RWG, i64)
+DEF_HELPER_FLAGS_2(set_alarm, TCG_CALL_NO_RWG, void, env, i64)
 #endif
 
 #include "def-helper.h"
commit 78505279665e64d28e4a308818b69e6f77ecc5d2
Author: Aurelien Jarno <aurelien at aurel32.net>
Date:   Tue Oct 9 21:53:08 2012 +0200

    tcg: rework TCG helper flags
    
    The current helper flags, TCG_CALL_CONST and TCG_CALL_PURE might be
    confusing and doesn't provide enough granularity for some helpers (FP
    helpers for example).
    
    This patch changes them into the following helpers flags:
    - TCG_CALL_NO_READ_GLOBALS means that the helper does not read globals,
      either directly or via an exception. They will not be saved to their
      canonical location before calling the helper.
    - TCG_CALL_NO_WRITE_GLOBALS means that the helper does not modify any
      globals. They will only be saved to their canonical locations before
      calling helpers, but they won't be reloaded afterwise.
    - TCG_CALL_NO_SIDE_EFFECTS means that the call to the function is
      removed if the return value is not used.
    
    It provides convenience flags, to avoid helper definitions longer than
    80 characters. It also provides compatibility flags, and updates the
    documentation.
    
    Reviewed-by: Richard Henderson <rth at twiddle.net>
    Signed-off-by: Aurelien Jarno <aurelien at aurel32.net>

diff --git a/tcg/README b/tcg/README
index 9d1b100..ec1ac79 100644
--- a/tcg/README
+++ b/tcg/README
@@ -77,11 +77,20 @@ destroyed, but local temporaries and globals are preserved.
 Using the tcg_gen_helper_x_y it is possible to call any function
 taking i32, i64 or pointer types. By default, before calling a helper,
 all globals are stored at their canonical location and it is assumed
-that the function can modify them. This can be overridden by the
-TCG_CALL_CONST function modifier. By default, the helper is allowed to
-modify the CPU state or raise an exception. This can be overridden by
-the TCG_CALL_PURE function modifier, in which case the call to the
-function is removed if the return value is not used.
+that the function can modify them. By default, the helper is allowed to
+modify the CPU state or raise an exception.
+
+This can be overridden using the following function modifiers:
+- TCG_CALL_NO_READ_GLOBALS means that the helper does not read globals,
+  either directly or via an exception. They will not be saved to their
+  canonical locations before calling the helper.
+- TCG_CALL_NO_WRITE_GLOBALS means that the helper does not modify any globals.
+  They will only be saved to their canonical location before calling helpers,
+  but they won't be reloaded afterwise.
+- TCG_CALL_NO_SIDE_EFFECTS means that the call to the function is removed if
+  the return value is not used.
+
+Note that TCG_CALL_NO_READ_GLOBALS implies TCG_CALL_NO_WRITE_GLOBALS.
 
 On some TCG targets (e.g. x86), several calling conventions are
 supported.
diff --git a/tcg/optimize.c b/tcg/optimize.c
index a06c8eb..8e5d918 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -915,7 +915,8 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
 
         case INDEX_op_call:
             nb_call_args = (args[0] >> 16) + (args[0] & 0xffff);
-            if (!(args[nb_call_args + 1] & (TCG_CALL_CONST | TCG_CALL_PURE))) {
+            if (!(args[nb_call_args + 1] & (TCG_CALL_NO_READ_GLOBALS |
+                                            TCG_CALL_NO_WRITE_GLOBALS))) {
                 for (i = 0; i < nb_globals; i++) {
                     reset_temp(i);
                 }
diff --git a/tcg/tcg-op.h b/tcg/tcg-op.h
index 8100a5a..8d1da2b 100644
--- a/tcg/tcg-op.h
+++ b/tcg/tcg-op.h
@@ -401,10 +401,10 @@ static inline void tcg_gen_helperN(void *func, int flags, int sizemask,
 }
 
 /* Note: Both tcg_gen_helper32() and tcg_gen_helper64() are currently
-   reserved for helpers in tcg-runtime.c. These helpers are all const
-   and pure, hence the call to tcg_gen_callN() with TCG_CALL_CONST |
-   TCG_CALL_PURE. This may need to be adjusted if these functions
-   start to be used with other helpers. */
+   reserved for helpers in tcg-runtime.c. These helpers all do not read
+   globals and do not have side effects, hence the call to tcg_gen_callN()
+   with TCG_CALL_NO_READ_GLOBALS | TCG_CALL_NO_SIDE_EFFECTS. This may need
+   to be adjusted if these functions start to be used with other helpers. */
 static inline void tcg_gen_helper32(void *func, int sizemask, TCGv_i32 ret,
                                     TCGv_i32 a, TCGv_i32 b)
 {
@@ -413,8 +413,9 @@ static inline void tcg_gen_helper32(void *func, int sizemask, TCGv_i32 ret,
     fn = tcg_const_ptr(func);
     args[0] = GET_TCGV_I32(a);
     args[1] = GET_TCGV_I32(b);
-    tcg_gen_callN(&tcg_ctx, fn, TCG_CALL_CONST | TCG_CALL_PURE, sizemask,
-                  GET_TCGV_I32(ret), 2, args);
+    tcg_gen_callN(&tcg_ctx, fn,
+                  TCG_CALL_NO_READ_GLOBALS | TCG_CALL_NO_SIDE_EFFECTS,
+                  sizemask, GET_TCGV_I32(ret), 2, args);
     tcg_temp_free_ptr(fn);
 }
 
@@ -426,8 +427,9 @@ static inline void tcg_gen_helper64(void *func, int sizemask, TCGv_i64 ret,
     fn = tcg_const_ptr(func);
     args[0] = GET_TCGV_I64(a);
     args[1] = GET_TCGV_I64(b);
-    tcg_gen_callN(&tcg_ctx, fn, TCG_CALL_CONST | TCG_CALL_PURE, sizemask,
-                  GET_TCGV_I64(ret), 2, args);
+    tcg_gen_callN(&tcg_ctx, fn,
+                  TCG_CALL_NO_READ_GLOBALS | TCG_CALL_NO_SIDE_EFFECTS,
+                  sizemask, GET_TCGV_I64(ret), 2, args);
     tcg_temp_free_ptr(fn);
 }
 
diff --git a/tcg/tcg.c b/tcg/tcg.c
index c9c6423..c3a7f19 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -1251,7 +1251,7 @@ static void tcg_liveness_analysis(TCGContext *s)
 
                 /* pure functions can be removed if their result is not
                    used */
-                if (call_flags & TCG_CALL_PURE) {
+                if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) {
                     for(i = 0; i < nb_oargs; i++) {
                         arg = args[i];
                         if (!dead_temps[arg] || mem_temps[arg]) {
@@ -1277,11 +1277,15 @@ static void tcg_liveness_analysis(TCGContext *s)
                         dead_temps[arg] = 1;
                         mem_temps[arg] = 0;
                     }
-                    
-                    if (!(call_flags & TCG_CALL_CONST)) {
+
+                    if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) {
+                        /* globals should be synced to memory */
+                        memset(mem_temps, 1, s->nb_globals);
+                    }
+                    if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS |
+                                        TCG_CALL_NO_READ_GLOBALS))) {
                         /* globals should go back to memory */
                         memset(dead_temps, 1, s->nb_globals);
-                        memset(mem_temps, 1, s->nb_globals);
                     }
 
                     /* input args are live */
@@ -2128,10 +2132,14 @@ static int tcg_reg_alloc_call(TCGContext *s, const TCGOpDef *def,
             tcg_reg_free(s, reg);
         }
     }
-    
-    /* store globals and free associated registers (we assume the call
-       can modify any global. */
-    if (!(flags & TCG_CALL_CONST)) {
+
+    /* Save globals if they might be written by the helper, sync them if
+       they might be read. */
+    if (flags & TCG_CALL_NO_READ_GLOBALS) {
+        /* Nothing to do */
+    } else if (flags & TCG_CALL_NO_WRITE_GLOBALS) {
+        sync_globals(s, allocated_regs);
+    } else {
         save_globals(s, allocated_regs);
     }
 
diff --git a/tcg/tcg.h b/tcg/tcg.h
index 4b44ecd..96b6222 100644
--- a/tcg/tcg.h
+++ b/tcg/tcg.h
@@ -253,14 +253,24 @@ typedef int TCGv_i64;
 #define TCGV_UNUSED_I64(x) x = MAKE_TCGV_I64(-1)
 
 /* call flags */
-/* A pure function only reads its arguments and TCG global variables
-   and cannot raise exceptions. Hence a call to a pure function can be
-   safely suppressed if the return value is not used. */
-#define TCG_CALL_PURE           0x0010 
-/* A const function only reads its arguments and does not use TCG
-   global variables. Hence a call to such a function does not
-   save TCG global variables back to their canonical location. */
-#define TCG_CALL_CONST          0x0020
+/* Helper does not read globals (either directly or through an exception). It
+   implies TCG_CALL_NO_WRITE_GLOBALS. */
+#define TCG_CALL_NO_READ_GLOBALS    0x0010
+/* Helper does not write globals */
+#define TCG_CALL_NO_WRITE_GLOBALS   0x0020
+/* Helper can be safely suppressed if the return value is not used. */
+#define TCG_CALL_NO_SIDE_EFFECTS    0x0040
+
+/* convenience version of most used call flags */
+#define TCG_CALL_NO_RWG         TCG_CALL_NO_READ_GLOBALS
+#define TCG_CALL_NO_WG          TCG_CALL_NO_WRITE_GLOBALS
+#define TCG_CALL_NO_SE          TCG_CALL_NO_SIDE_EFFECTS
+#define TCG_CALL_NO_RWG_SE      (TCG_CALL_NO_RWG | TCG_CALL_NO_SE)
+#define TCG_CALL_NO_WG_SE       (TCG_CALL_NO_WG | TCG_CALL_NO_SE)
+
+/* compatibility call flags, they should be eventually be removed */
+#define TCG_CALL_PURE           TCG_CALL_NO_SIDE_EFFECTS
+#define TCG_CALL_CONST          TCG_CALL_NO_READ_GLOBALS
 
 /* used to align parameters */
 #define TCG_CALL_DUMMY_TCGV     MAKE_TCGV_I32(-1)
commit 3d5c5f876d171983d614925eebe4f0edccbb75be
Author: Aurelien Jarno <aurelien at aurel32.net>
Date:   Tue Oct 9 21:53:08 2012 +0200

    tcg: synchronize globals for ops with side effects
    
    Operations with side effects (in practice qemu_ld/st ops), only need to
    synchronize globals to make sure the CPU state is consistent in case of
    exception.
    
    Reviewed-by: Richard Henderson <rth at twiddle.net>
    Signed-off-by: Aurelien Jarno <aurelien at aurel32.net>

diff --git a/tcg/tcg.c b/tcg/tcg.c
index 840e252..c9c6423 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -1407,9 +1407,8 @@ static void tcg_liveness_analysis(TCGContext *s)
                 /* if end of basic block, update */
                 if (def->flags & TCG_OPF_BB_END) {
                     tcg_la_bb_end(s, dead_temps, mem_temps);
-                } else if (def->flags & TCG_OPF_CALL_CLOBBER) {
-                    /* globals should go back to memory */
-                    memset(dead_temps, 1, s->nb_globals);
+                } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
+                    /* globals should be synced to memory */
                     memset(mem_temps, 1, s->nb_globals);
                 }
 
@@ -1670,6 +1669,23 @@ static void save_globals(TCGContext *s, TCGRegSet allocated_regs)
     }
 }
 
+/* sync globals to their canonical location and assume they can be
+   read by the following code. 'allocated_regs' is used in case a
+   temporary registers needs to be allocated to store a constant. */
+static void sync_globals(TCGContext *s, TCGRegSet allocated_regs)
+{
+    int i;
+
+    for (i = 0; i < s->nb_globals; i++) {
+#ifdef USE_LIVENESS_ANALYSIS
+        assert(s->temps[i].val_type != TEMP_VAL_REG || s->temps[i].fixed_reg ||
+               s->temps[i].mem_coherent);
+#else
+        temp_sync(s, i, allocated_regs);
+#endif
+    }
+}
+
 /* at the end of a basic block, we assume all temporaries are dead and
    all globals are stored at their canonical location. */
 static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs)
@@ -1907,12 +1923,11 @@ static void tcg_reg_alloc_op(TCGContext *s,
                     tcg_reg_free(s, reg);
                 }
             }
-            /* XXX: for load/store we could do that only for the slow path
-               (i.e. when a memory callback is called) */
-            
-            /* store globals and free associated registers (we assume the insn
-               can modify any global. */
-            save_globals(s, allocated_regs);
+        }
+        if (def->flags & TCG_OPF_SIDE_EFFECTS) {
+            /* sync globals if the op has side effects and might trigger
+               an exception. */
+            sync_globals(s, allocated_regs);
         }
         
         /* satisfy the output constraints */
diff --git a/tcg/tcg.h b/tcg/tcg.h
index cd5bf00..4b44ecd 100644
--- a/tcg/tcg.h
+++ b/tcg/tcg.h
@@ -530,8 +530,8 @@ enum {
     TCG_OPF_BB_END       = 0x01,
     /* Instruction clobbers call registers and potentially update globals.  */
     TCG_OPF_CALL_CLOBBER = 0x02,
-    /* Instruction has side effects: it cannot be removed
-       if its outputs are not used.  */
+    /* Instruction has side effects: it cannot be removed if its outputs
+       are not used, and might trigger exceptions.  */
     TCG_OPF_SIDE_EFFECTS = 0x04,
     /* Instruction operands are 64-bits (otherwise 32-bits).  */
     TCG_OPF_64BIT        = 0x08,
commit b202d41ee705a7a6152d03eb362e2e2147505022
Author: Aurelien Jarno <aurelien at aurel32.net>
Date:   Tue Oct 9 21:53:08 2012 +0200

    tcg: forbid ld/st function to modify globals
    
    Mapping a memory address using a global and accessing it through
    ld/st operations is currently broken. As it doesn't make any sense
    to do that performance wise, let's forbid that.
    
    Update the TCG documentation, and remove partial support for that.
    
    Reviewed-by: Richard Henderson <rth at twiddle.net>
    Signed-off-by: Aurelien Jarno <aurelien at aurel32.net>

diff --git a/tcg/README b/tcg/README
index aa86992..9d1b100 100644
--- a/tcg/README
+++ b/tcg/README
@@ -349,6 +349,9 @@ st32_i64 t0, t1, offset
 write(t0, t1 + offset)
 Write 8, 16, 32 or 64 bits to host memory.
 
+All this opcodes assume that the pointed host memory doesn't correspond
+to a global. In the latter case the behaviour is unpredictable.
+
 ********* 64-bit target on 32-bit host support
 
 The following opcodes are internal to TCG.  Thus they are to be implemented by
diff --git a/tcg/tcg-opc.h b/tcg/tcg-opc.h
index 2072281..9651063 100644
--- a/tcg/tcg-opc.h
+++ b/tcg/tcg-opc.h
@@ -57,9 +57,9 @@ DEF(ld8s_i32, 1, 1, 1, 0)
 DEF(ld16u_i32, 1, 1, 1, 0)
 DEF(ld16s_i32, 1, 1, 1, 0)
 DEF(ld_i32, 1, 1, 1, 0)
-DEF(st8_i32, 0, 2, 1, TCG_OPF_SIDE_EFFECTS)
-DEF(st16_i32, 0, 2, 1, TCG_OPF_SIDE_EFFECTS)
-DEF(st_i32, 0, 2, 1, TCG_OPF_SIDE_EFFECTS)
+DEF(st8_i32, 0, 2, 1, 0)
+DEF(st16_i32, 0, 2, 1, 0)
+DEF(st_i32, 0, 2, 1, 0)
 /* arith */
 DEF(add_i32, 1, 2, 0, 0)
 DEF(sub_i32, 1, 2, 0, 0)
@@ -115,10 +115,10 @@ DEF(ld16s_i64, 1, 1, 1, IMPL64)
 DEF(ld32u_i64, 1, 1, 1, IMPL64)
 DEF(ld32s_i64, 1, 1, 1, IMPL64)
 DEF(ld_i64, 1, 1, 1, IMPL64)
-DEF(st8_i64, 0, 2, 1, TCG_OPF_SIDE_EFFECTS | IMPL64)
-DEF(st16_i64, 0, 2, 1, TCG_OPF_SIDE_EFFECTS | IMPL64)
-DEF(st32_i64, 0, 2, 1, TCG_OPF_SIDE_EFFECTS | IMPL64)
-DEF(st_i64, 0, 2, 1, TCG_OPF_SIDE_EFFECTS | IMPL64)
+DEF(st8_i64, 0, 2, 1, IMPL64)
+DEF(st16_i64, 0, 2, 1, IMPL64)
+DEF(st32_i64, 0, 2, 1, IMPL64)
+DEF(st_i64, 0, 2, 1, IMPL64)
 /* arith */
 DEF(add_i64, 1, 2, 0, IMPL64)
 DEF(sub_i64, 1, 2, 0, IMPL64)
commit 344028ba0f63b31c30723d2c1064d3539aa0f070
Author: Aurelien Jarno <aurelien at aurel32.net>
Date:   Tue Oct 9 21:53:08 2012 +0200

    tcg: fix some op flags
    
    Some branch related ops are marked with TCG_OPF_SIDE_EFFECTS, some other
    not. In practice they don't need to, as they are all marked with
    TCG_OPF_BB_END, which is handled specifically in all the code.
    
    The call op is marked as TCG_OPF_SIDE_EFFECTS, which might be not true
    as there is are specific flags (TCG_CALL_CONST and TCG_CALL_PURE) for
    specifying that. On the other hand it always clobber arguments, so mark
    it as such even if the call op is handled in a different code path.
    
    Reviewed-by: Richard Henderson <rth at twiddle.net>
    Signed-off-by: Aurelien Jarno <aurelien at aurel32.net>

diff --git a/tcg/tcg-opc.h b/tcg/tcg-opc.h
index 04cb7ca..2072281 100644
--- a/tcg/tcg-opc.h
+++ b/tcg/tcg-opc.h
@@ -37,8 +37,8 @@ DEF(nopn, 0, 0, 1, 0) /* variable number of parameters */
 DEF(discard, 1, 0, 0, 0)
 
 DEF(set_label, 0, 0, 1, TCG_OPF_BB_END)
-DEF(call, 0, 1, 2, TCG_OPF_SIDE_EFFECTS) /* variable number of parameters */
-DEF(br, 0, 0, 1, TCG_OPF_BB_END | TCG_OPF_SIDE_EFFECTS)
+DEF(call, 0, 1, 2, TCG_OPF_CALL_CLOBBER) /* variable number of parameters */
+DEF(br, 0, 0, 1, TCG_OPF_BB_END)
 
 #define IMPL(X) (X ? 0 : TCG_OPF_NOT_PRESENT)
 #if TCG_TARGET_REG_BITS == 32
@@ -81,12 +81,11 @@ DEF(rotl_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_rot_i32))
 DEF(rotr_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_rot_i32))
 DEF(deposit_i32, 1, 2, 2, IMPL(TCG_TARGET_HAS_deposit_i32))
 
-DEF(brcond_i32, 0, 2, 2, TCG_OPF_BB_END | TCG_OPF_SIDE_EFFECTS)
+DEF(brcond_i32, 0, 2, 2, TCG_OPF_BB_END)
 
 DEF(add2_i32, 2, 4, 0, IMPL(TCG_TARGET_REG_BITS == 32))
 DEF(sub2_i32, 2, 4, 0, IMPL(TCG_TARGET_REG_BITS == 32))
-DEF(brcond2_i32, 0, 4, 2,
-    TCG_OPF_BB_END | TCG_OPF_SIDE_EFFECTS | IMPL(TCG_TARGET_REG_BITS == 32))
+DEF(brcond2_i32, 0, 4, 2, TCG_OPF_BB_END | IMPL(TCG_TARGET_REG_BITS == 32))
 DEF(mulu2_i32, 2, 2, 0, IMPL(TCG_TARGET_REG_BITS == 32))
 DEF(setcond2_i32, 1, 4, 1, IMPL(TCG_TARGET_REG_BITS == 32))
 
@@ -141,7 +140,7 @@ DEF(rotl_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_rot_i64))
 DEF(rotr_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_rot_i64))
 DEF(deposit_i64, 1, 2, 2, IMPL64 | IMPL(TCG_TARGET_HAS_deposit_i64))
 
-DEF(brcond_i64, 0, 2, 2, TCG_OPF_BB_END | TCG_OPF_SIDE_EFFECTS | IMPL64)
+DEF(brcond_i64, 0, 2, 2, TCG_OPF_BB_END | IMPL64)
 DEF(ext8s_i64, 1, 1, 0, IMPL64 | IMPL(TCG_TARGET_HAS_ext8s_i64))
 DEF(ext16s_i64, 1, 1, 0, IMPL64 | IMPL(TCG_TARGET_HAS_ext16s_i64))
 DEF(ext32s_i64, 1, 1, 0, IMPL64 | IMPL(TCG_TARGET_HAS_ext32s_i64))
@@ -165,8 +164,8 @@ DEF(debug_insn_start, 0, 0, 2, 0)
 #else
 DEF(debug_insn_start, 0, 0, 1, 0)
 #endif
-DEF(exit_tb, 0, 0, 1, TCG_OPF_BB_END | TCG_OPF_SIDE_EFFECTS)
-DEF(goto_tb, 0, 0, 1, TCG_OPF_BB_END | TCG_OPF_SIDE_EFFECTS)
+DEF(exit_tb, 0, 0, 1, TCG_OPF_BB_END)
+DEF(goto_tb, 0, 0, 1, TCG_OPF_BB_END)
 /* Note: even if TARGET_LONG_BITS is not defined, the INDEX_op
    constants must be defined */
 #if TCG_TARGET_REG_BITS == 32
commit 2c0366f036cbb7b2565ba8185a9daee98d709edb
Author: Aurelien Jarno <aurelien at aurel32.net>
Date:   Tue Oct 9 21:53:07 2012 +0200

    tcg: don't explicitly save globals and temps
    
    The liveness analysis ensures that globals and temps are at the correct
    state at a basic block end or with an op with side effects. Avoid
    looping on all temps, this can be time consuming on targets with a lot
    of globals. Keep an assert in debug mode.
    
    Reviewed-by: Richard Henderson <rth at twiddle.net>
    Signed-off-by: Aurelien Jarno <aurelien at aurel32.net>

diff --git a/tcg/tcg.c b/tcg/tcg.c
index 8333667..840e252 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -1648,8 +1648,14 @@ static inline void temp_sync(TCGContext *s, int temp, TCGRegSet allocated_regs)
    temporary registers needs to be allocated to store a constant. */
 static inline void temp_save(TCGContext *s, int temp, TCGRegSet allocated_regs)
 {
+#ifdef USE_LIVENESS_ANALYSIS
+    /* The liveness analysis already ensures that globals are back
+       in memory. Keep an assert for safety. */
+    assert(s->temps[temp].val_type == TEMP_VAL_MEM || s->temps[temp].fixed_reg);
+#else
     temp_sync(s, temp, allocated_regs);
     temp_dead(s, temp);
+#endif
 }
 
 /* save globals to their canonical location and assume they can be
@@ -1676,7 +1682,13 @@ static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs)
         if (ts->temp_local) {
             temp_save(s, i, allocated_regs);
         } else {
+#ifdef USE_LIVENESS_ANALYSIS
+            /* The liveness analysis already ensures that temps are dead.
+               Keep an assert for safety. */
+            assert(ts->val_type == TEMP_VAL_DEAD);
+#else
             temp_dead(s, i);
+#endif
         }
     }
 
commit 7dfd8c6aa1d0691d03c5ccc9b0c5a93bcf00f768
Author: Aurelien Jarno <aurelien at aurel32.net>
Date:   Tue Oct 9 21:53:07 2012 +0200

    tcg: start with local temps in TEMP_VAL_MEM state
    
    Start with local temps in TEMP_VAL_MEM state, to make possible a later
    check that all the temps are correctly saved back to memory.
    
    Reviewed-by: Richard Henderson <rth at twiddle.net>
    Signed-off-by: Aurelien Jarno <aurelien at aurel32.net>

diff --git a/tcg/tcg.c b/tcg/tcg.c
index d81b628..8333667 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -776,7 +776,11 @@ static void tcg_reg_alloc_start(TCGContext *s)
     }
     for(i = s->nb_globals; i < s->nb_temps; i++) {
         ts = &s->temps[i];
-        ts->val_type = TEMP_VAL_DEAD;
+        if (ts->temp_local) {
+            ts->val_type = TEMP_VAL_MEM;
+        } else {
+            ts->val_type = TEMP_VAL_DEAD;
+        }
         ts->mem_allocated = 0;
         ts->fixed_reg = 0;
     }
commit a52ad07e7cde61c799ca3351ccd58c04d6f7474f
Author: Aurelien Jarno <aurelien at aurel32.net>
Date:   Tue Oct 9 21:53:07 2012 +0200

    tcg: always mark dead input arguments as dead
    
    Always mark dead input arguments as dead, even if the op is at the basic
    block end. This will allow to check that all temps are correctly saved.
    
    Reviewed-by: Richard Henderson <rth at twiddle.net>
    Signed-off-by: Aurelien Jarno <aurelien at aurel32.net>

diff --git a/tcg/tcg.c b/tcg/tcg.c
index b219a6d..d81b628 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -1874,16 +1874,16 @@ static void tcg_reg_alloc_op(TCGContext *s,
     iarg_end: ;
     }
     
+    /* mark dead temporaries and free the associated registers */
+    for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
+        if (IS_DEAD_ARG(i)) {
+            temp_dead(s, args[i]);
+        }
+    }
+
     if (def->flags & TCG_OPF_BB_END) {
         tcg_reg_alloc_bb_end(s, allocated_regs);
     } else {
-        /* mark dead temporaries and free the associated registers */
-        for(i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
-            if (IS_DEAD_ARG(i)) {
-                temp_dead(s, args[i]);
-            }
-        }
-        
         if (def->flags & TCG_OPF_CALL_CLOBBER) {
             /* XXX: permit generic clobber register list ? */ 
             for(reg = 0; reg < TCG_TARGET_NB_REGS; reg++) {
commit c29c1d7edfc57fc4ab15e30b3cea990a5685dd7a
Author: Aurelien Jarno <aurelien at aurel32.net>
Date:   Tue Oct 9 21:53:07 2012 +0200

    tcg: rewrite tcg_reg_alloc_mov()
    
    Now that the liveness analysis provides more information, rewrite
    tcg_reg_alloc_mov(). This changes the behaviour about propagating
    constants and memory accesses. We now take the assumption that once
    a value is loaded into a register (from memory or from a constant),
    it's better to keep it there than to reload it later. This assumption
    is now always almost correct given that we are now sure the
    corresponding temp is going to be used later (otherwise it would have
    been synchronized and marked as dead already). The assumption is wrong
    if one of the op after clobbers some registers including the one
    of the holding the temp (this can be avoided by allocating clobbered
    registers last, which is what most TCG target do), or in case of lack
    of available register.
    
    Reviewed-by: Richard Henderson <rth at twiddle.net>
    Signed-off-by: Aurelien Jarno <aurelien at aurel32.net>

diff --git a/tcg/tcg.c b/tcg/tcg.c
index 6586385..b219a6d 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -1714,64 +1714,80 @@ static void tcg_reg_alloc_mov(TCGContext *s, const TCGOpDef *def,
                               const TCGArg *args, uint16_t dead_args,
                               uint8_t sync_args)
 {
+    TCGRegSet allocated_regs;
     TCGTemp *ts, *ots;
-    int reg;
-    const TCGArgConstraint *arg_ct;
+    const TCGArgConstraint *arg_ct, *oarg_ct;
 
+    tcg_regset_set(allocated_regs, s->reserved_regs);
     ots = &s->temps[args[0]];
     ts = &s->temps[args[1]];
-    arg_ct = &def->args_ct[0];
+    oarg_ct = &def->args_ct[0];
+    arg_ct = &def->args_ct[1];
+
+    /* If the source value is not in a register, and we're going to be
+       forced to have it in a register in order to perform the copy,
+       then copy the SOURCE value into its own register first.  That way
+       we don't have to reload SOURCE the next time it is used. */
+    if (((NEED_SYNC_ARG(0) || ots->fixed_reg) && ts->val_type != TEMP_VAL_REG)
+        || ts->val_type == TEMP_VAL_MEM) {
+        ts->reg = tcg_reg_alloc(s, arg_ct->u.regs, allocated_regs);
+        if (ts->val_type == TEMP_VAL_MEM) {
+            tcg_out_ld(s, ts->type, ts->reg, ts->mem_reg, ts->mem_offset);
+            ts->mem_coherent = 1;
+        } else if (ts->val_type == TEMP_VAL_CONST) {
+            tcg_out_movi(s, ts->type, ts->reg, ts->val);
+        }
+        s->reg_to_temp[ts->reg] = args[1];
+        ts->val_type = TEMP_VAL_REG;
+    }
 
-    /* XXX: always mark arg dead if IS_DEAD_ARG(1) */
-    if (ts->val_type == TEMP_VAL_REG) {
+    if (IS_DEAD_ARG(0) && !ots->fixed_reg) {
+        /* mov to a non-saved dead register makes no sense (even with
+           liveness analysis disabled). */
+        assert(NEED_SYNC_ARG(0));
+        /* The code above should have moved the temp to a register. */
+        assert(ts->val_type == TEMP_VAL_REG);
+        if (!ots->mem_allocated) {
+            temp_allocate_frame(s, args[0]);
+        }
+        tcg_out_st(s, ots->type, ts->reg, ots->mem_reg, ots->mem_offset);
+        if (IS_DEAD_ARG(1)) {
+            temp_dead(s, args[1]);
+        }
+        temp_dead(s, args[0]);
+    } else if (ts->val_type == TEMP_VAL_CONST) {
+        /* propagate constant */
+        if (ots->val_type == TEMP_VAL_REG) {
+            s->reg_to_temp[ots->reg] = -1;
+        }
+        ots->val_type = TEMP_VAL_CONST;
+        ots->val = ts->val;
+    } else {
+        /* The code in the first if block should have moved the
+           temp to a register. */
+        assert(ts->val_type == TEMP_VAL_REG);
         if (IS_DEAD_ARG(1) && !ts->fixed_reg && !ots->fixed_reg) {
             /* the mov can be suppressed */
-            if (ots->val_type == TEMP_VAL_REG)
+            if (ots->val_type == TEMP_VAL_REG) {
                 s->reg_to_temp[ots->reg] = -1;
-            reg = ts->reg;
+            }
+            ots->reg = ts->reg;
             temp_dead(s, args[1]);
         } else {
-            if (ots->val_type == TEMP_VAL_REG) {
-                reg = ots->reg;
-            } else {
-                reg = tcg_reg_alloc(s, arg_ct->u.regs, s->reserved_regs);
-            }
-            if (ts->reg != reg) {
-                tcg_out_mov(s, ots->type, reg, ts->reg);
+            if (ots->val_type != TEMP_VAL_REG) {
+                /* When allocating a new register, make sure to not spill the
+                   input one. */
+                tcg_regset_set_reg(allocated_regs, ts->reg);
+                ots->reg = tcg_reg_alloc(s, oarg_ct->u.regs, allocated_regs);
             }
+            tcg_out_mov(s, ots->type, ots->reg, ts->reg);
         }
-    } else if (ts->val_type == TEMP_VAL_MEM) {
-        if (ots->val_type == TEMP_VAL_REG) {
-            reg = ots->reg;
-        } else {
-            reg = tcg_reg_alloc(s, arg_ct->u.regs, s->reserved_regs);
+        ots->val_type = TEMP_VAL_REG;
+        ots->mem_coherent = 0;
+        s->reg_to_temp[ots->reg] = args[0];
+        if (NEED_SYNC_ARG(0)) {
+            tcg_reg_sync(s, ots->reg);
         }
-        tcg_out_ld(s, ts->type, reg, ts->mem_reg, ts->mem_offset);
-    } else if (ts->val_type == TEMP_VAL_CONST) {
-        if (ots->fixed_reg) {
-            reg = ots->reg;
-            tcg_out_movi(s, ots->type, reg, ts->val);
-        } else {
-            /* propagate constant */
-            if (ots->val_type == TEMP_VAL_REG)
-                s->reg_to_temp[ots->reg] = -1;
-            ots->val_type = TEMP_VAL_CONST;
-            ots->val = ts->val;
-            if (NEED_SYNC_ARG(0)) {
-                temp_sync(s, args[0], s->reserved_regs);
-            }
-            return;
-        }
-    } else {
-        tcg_abort();
-    }
-    s->reg_to_temp[reg] = args[0];
-    ots->reg = reg;
-    ots->val_type = TEMP_VAL_REG;
-    ots->mem_coherent = 0;
-
-    if (NEED_SYNC_ARG(0)) {
-        tcg_reg_sync(s, reg);
     }
 }
 
commit 4c4e1ab26bfc9b3f9b3097ecd0643bc20d0e2f0d
Author: Aurelien Jarno <aurelien at aurel32.net>
Date:   Tue Oct 9 21:53:07 2012 +0200

    tcg: improve tcg_reg_alloc_movi()
    
    Now that the liveness analysis might mark some output temps as dead, call
    temp_dead() if needed.
    
    Reviewed-by: Richard Henderson <rth at twiddle.net>
    Signed-off-by: Aurelien Jarno <aurelien at aurel32.net>

diff --git a/tcg/tcg.c b/tcg/tcg.c
index 567ec87..6586385 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -1705,6 +1705,9 @@ static void tcg_reg_alloc_movi(TCGContext *s, const TCGArg *args,
     if (NEED_SYNC_ARG(0)) {
         temp_sync(s, args[0], s->reserved_regs);
     }
+    if (IS_DEAD_ARG(0)) {
+        temp_dead(s, args[0]);
+    }
 }
 
 static void tcg_reg_alloc_mov(TCGContext *s, const TCGOpDef *def,
commit 9c43b68de628a1e2cba556adfb71c17028eb802e
Author: Aurelien Jarno <aurelien at aurel32.net>
Date:   Tue Oct 9 21:53:07 2012 +0200

    tcg: rework liveness analysis
    
    Rework the liveness analysis by tracking temps that need to go back to
    memory in addition to dead temps tracking. This allows to mark output
    arguments as "need sync", and to synchronize them back to memory as soon
    as they are not written anymore. This way even arguments mapping to
    globals can be marked as "dead", avoiding moves to a new register when
    input and outputs are aliased.
    
    In addition it means that registers are freed as soon as temps are not
    used anymore, instead of waiting for a basic block end or an op with side
    effects. This reduces register spilling especially on CPUs with few
    registers, and spread the mov over all the TB, increasing the
    performances on in-order CPUs.
    
    Reviewed-by: Richard Henderson <rth at twiddle.net>
    Signed-off-by: Aurelien Jarno <aurelien at aurel32.net>

diff --git a/tcg/tcg.c b/tcg/tcg.c
index cfeeaf1..567ec87 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -1180,31 +1180,27 @@ static inline void tcg_set_nop(TCGContext *s, uint16_t *opc_ptr,
     }
 }
 
-/* liveness analysis: end of function: globals are live, temps are
-   dead. */
-/* XXX: at this stage, not used as there would be little gains because
-   most TBs end with a conditional jump. */
-static inline void tcg_la_func_end(TCGContext *s, uint8_t *dead_temps)
+/* liveness analysis: end of function: all temps are dead, and globals
+   should be in memory. */
+static inline void tcg_la_func_end(TCGContext *s, uint8_t *dead_temps,
+                                   uint8_t *mem_temps)
 {
-    memset(dead_temps, 0, s->nb_globals);
-    memset(dead_temps + s->nb_globals, 1, s->nb_temps - s->nb_globals);
+    memset(dead_temps, 1, s->nb_temps);
+    memset(mem_temps, 1, s->nb_globals);
+    memset(mem_temps + s->nb_globals, 0, s->nb_temps - s->nb_globals);
 }
 
-/* liveness analysis: end of basic block: globals are live, temps are
-   dead, local temps are live. */
-static inline void tcg_la_bb_end(TCGContext *s, uint8_t *dead_temps)
+/* liveness analysis: end of basic block: all temps are dead, globals
+   and local temps should be in memory. */
+static inline void tcg_la_bb_end(TCGContext *s, uint8_t *dead_temps,
+                                 uint8_t *mem_temps)
 {
     int i;
-    TCGTemp *ts;
 
-    memset(dead_temps, 0, s->nb_globals);
-    ts = &s->temps[s->nb_globals];
+    memset(dead_temps, 1, s->nb_temps);
+    memset(mem_temps, 1, s->nb_globals);
     for(i = s->nb_globals; i < s->nb_temps; i++) {
-        if (ts->temp_local)
-            dead_temps[i] = 0;
-        else
-            dead_temps[i] = 1;
-        ts++;
+        mem_temps[i] = s->temps[i].temp_local;
     }
 }
 
@@ -1217,7 +1213,7 @@ static void tcg_liveness_analysis(TCGContext *s)
     TCGOpcode op;
     TCGArg *args;
     const TCGOpDef *def;
-    uint8_t *dead_temps;
+    uint8_t *dead_temps, *mem_temps;
     uint16_t dead_args;
     uint8_t sync_args;
     
@@ -1229,7 +1225,8 @@ static void tcg_liveness_analysis(TCGContext *s)
     s->op_sync_args = tcg_malloc(nb_ops * sizeof(uint8_t));
     
     dead_temps = tcg_malloc(s->nb_temps);
-    memset(dead_temps, 1, s->nb_temps);
+    mem_temps = tcg_malloc(s->nb_temps);
+    tcg_la_func_end(s, dead_temps, mem_temps);
 
     args = gen_opparam_ptr;
     op_index = nb_ops - 1;
@@ -1253,8 +1250,9 @@ static void tcg_liveness_analysis(TCGContext *s)
                 if (call_flags & TCG_CALL_PURE) {
                     for(i = 0; i < nb_oargs; i++) {
                         arg = args[i];
-                        if (!dead_temps[arg])
+                        if (!dead_temps[arg] || mem_temps[arg]) {
                             goto do_not_remove_call;
+                        }
                     }
                     tcg_set_nop(s, gen_opc_buf + op_index, 
                                 args - 1, nb_args);
@@ -1269,12 +1267,17 @@ static void tcg_liveness_analysis(TCGContext *s)
                         if (dead_temps[arg]) {
                             dead_args |= (1 << i);
                         }
+                        if (mem_temps[arg]) {
+                            sync_args |= (1 << i);
+                        }
                         dead_temps[arg] = 1;
+                        mem_temps[arg] = 0;
                     }
                     
                     if (!(call_flags & TCG_CALL_CONST)) {
-                        /* globals are live (they may be used by the call) */
-                        memset(dead_temps, 0, s->nb_globals);
+                        /* globals should go back to memory */
+                        memset(dead_temps, 1, s->nb_globals);
+                        memset(mem_temps, 1, s->nb_globals);
                     }
 
                     /* input args are live */
@@ -1304,6 +1307,7 @@ static void tcg_liveness_analysis(TCGContext *s)
             args--;
             /* mark the temporary as dead */
             dead_temps[args[0]] = 1;
+            mem_temps[args[0]] = 0;
             break;
         case INDEX_op_end:
             break;
@@ -1369,8 +1373,9 @@ static void tcg_liveness_analysis(TCGContext *s)
             if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) {
                 for(i = 0; i < nb_oargs; i++) {
                     arg = args[i];
-                    if (!dead_temps[arg])
+                    if (!dead_temps[arg] || mem_temps[arg]) {
                         goto do_not_remove;
+                    }
                 }
             do_remove:
                 tcg_set_nop(s, gen_opc_buf + op_index, args, def->nb_args);
@@ -1388,15 +1393,20 @@ static void tcg_liveness_analysis(TCGContext *s)
                     if (dead_temps[arg]) {
                         dead_args |= (1 << i);
                     }
+                    if (mem_temps[arg]) {
+                        sync_args |= (1 << i);
+                    }
                     dead_temps[arg] = 1;
+                    mem_temps[arg] = 0;
                 }
 
                 /* if end of basic block, update */
                 if (def->flags & TCG_OPF_BB_END) {
-                    tcg_la_bb_end(s, dead_temps);
+                    tcg_la_bb_end(s, dead_temps, mem_temps);
                 } else if (def->flags & TCG_OPF_CALL_CLOBBER) {
-                    /* globals are live */
-                    memset(dead_temps, 0, s->nb_globals);
+                    /* globals should go back to memory */
+                    memset(dead_temps, 1, s->nb_globals);
+                    memset(mem_temps, 1, s->nb_globals);
                 }
 
                 /* input args are live */
commit ec7a869d31a1f82fe5b7750d8952c26753ea1df8
Author: Aurelien Jarno <aurelien at aurel32.net>
Date:   Tue Oct 9 21:53:07 2012 +0200

    tcg: sync output arguments on liveness request
    
    Synchronize an output argument when requested by the liveness analysis.
    This is needed so that the temp can be declared dead later.
    
    For that, add a new op_sync_args table in which each bit tells if the
    corresponding output argument needs to be synchronized with the memory.
    Pass it to the tcg_reg_alloc_* functions, and honor this bit. We need to
    synchronize the argument before marking it as dead, and we have to make
    sure all the infos about the temp are correctly filled.
    
    At the same time change some types from unsigned int to uint16_t when
    passing op_dead_args.
    
    Reviewed-by: Richard Henderson <rth at twiddle.net>
    Signed-off-by: Aurelien Jarno <aurelien at aurel32.net>

diff --git a/tcg/tcg.c b/tcg/tcg.c
index fabf3cf..cfeeaf1 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -1218,13 +1218,15 @@ static void tcg_liveness_analysis(TCGContext *s)
     TCGArg *args;
     const TCGOpDef *def;
     uint8_t *dead_temps;
-    unsigned int dead_args;
+    uint16_t dead_args;
+    uint8_t sync_args;
     
     gen_opc_ptr++; /* skip end */
 
     nb_ops = gen_opc_ptr - gen_opc_buf;
 
     s->op_dead_args = tcg_malloc(nb_ops * sizeof(uint16_t));
+    s->op_sync_args = tcg_malloc(nb_ops * sizeof(uint8_t));
     
     dead_temps = tcg_malloc(s->nb_temps);
     memset(dead_temps, 1, s->nb_temps);
@@ -1261,6 +1263,7 @@ static void tcg_liveness_analysis(TCGContext *s)
 
                     /* output args are dead */
                     dead_args = 0;
+                    sync_args = 0;
                     for(i = 0; i < nb_oargs; i++) {
                         arg = args[i];
                         if (dead_temps[arg]) {
@@ -1285,6 +1288,7 @@ static void tcg_liveness_analysis(TCGContext *s)
                         }
                     }
                     s->op_dead_args[op_index] = dead_args;
+                    s->op_sync_args[op_index] = sync_args;
                 }
                 args--;
             }
@@ -1378,6 +1382,7 @@ static void tcg_liveness_analysis(TCGContext *s)
 
                 /* output args are dead */
                 dead_args = 0;
+                sync_args = 0;
                 for(i = 0; i < nb_oargs; i++) {
                     arg = args[i];
                     if (dead_temps[arg]) {
@@ -1403,6 +1408,7 @@ static void tcg_liveness_analysis(TCGContext *s)
                     dead_temps[arg] = 0;
                 }
                 s->op_dead_args[op_index] = dead_args;
+                s->op_sync_args[op_index] = sync_args;
             }
             break;
         }
@@ -1421,6 +1427,8 @@ static void tcg_liveness_analysis(TCGContext *s)
 
     s->op_dead_args = tcg_malloc(nb_ops * sizeof(uint16_t));
     memset(s->op_dead_args, 0, nb_ops * sizeof(uint16_t));
+    s->op_sync_args = tcg_malloc(nb_ops * sizeof(uint8_t));
+    memset(s->op_sync_args, 0, nb_ops * sizeof(uint8_t));
 }
 #endif
 
@@ -1662,8 +1670,10 @@ static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs)
 }
 
 #define IS_DEAD_ARG(n) ((dead_args >> (n)) & 1)
+#define NEED_SYNC_ARG(n) ((sync_args >> (n)) & 1)
 
-static void tcg_reg_alloc_movi(TCGContext *s, const TCGArg *args)
+static void tcg_reg_alloc_movi(TCGContext *s, const TCGArg *args,
+                               uint16_t dead_args, uint8_t sync_args)
 {
     TCGTemp *ots;
     tcg_target_ulong val;
@@ -1682,11 +1692,14 @@ static void tcg_reg_alloc_movi(TCGContext *s, const TCGArg *args)
         ots->val_type = TEMP_VAL_CONST;
         ots->val = val;
     }
+    if (NEED_SYNC_ARG(0)) {
+        temp_sync(s, args[0], s->reserved_regs);
+    }
 }
 
 static void tcg_reg_alloc_mov(TCGContext *s, const TCGOpDef *def,
-                              const TCGArg *args,
-                              unsigned int dead_args)
+                              const TCGArg *args, uint16_t dead_args,
+                              uint8_t sync_args)
 {
     TCGTemp *ts, *ots;
     int reg;
@@ -1731,6 +1744,9 @@ static void tcg_reg_alloc_mov(TCGContext *s, const TCGOpDef *def,
                 s->reg_to_temp[ots->reg] = -1;
             ots->val_type = TEMP_VAL_CONST;
             ots->val = ts->val;
+            if (NEED_SYNC_ARG(0)) {
+                temp_sync(s, args[0], s->reserved_regs);
+            }
             return;
         }
     } else {
@@ -1740,12 +1756,16 @@ static void tcg_reg_alloc_mov(TCGContext *s, const TCGOpDef *def,
     ots->reg = reg;
     ots->val_type = TEMP_VAL_REG;
     ots->mem_coherent = 0;
+
+    if (NEED_SYNC_ARG(0)) {
+        tcg_reg_sync(s, reg);
+    }
 }
 
 static void tcg_reg_alloc_op(TCGContext *s, 
                              const TCGOpDef *def, TCGOpcode opc,
-                             const TCGArg *args,
-                             unsigned int dead_args)
+                             const TCGArg *args, uint16_t dead_args,
+                             uint8_t sync_args)
 {
     TCGRegSet allocated_regs;
     int i, k, nb_iargs, nb_oargs, reg;
@@ -1871,19 +1891,15 @@ static void tcg_reg_alloc_op(TCGContext *s,
             tcg_regset_set_reg(allocated_regs, reg);
             /* if a fixed register is used, then a move will be done afterwards */
             if (!ts->fixed_reg) {
-                if (IS_DEAD_ARG(i)) {
-                    temp_dead(s, args[i]);
-                } else {
-                    if (ts->val_type == TEMP_VAL_REG) {
-                        s->reg_to_temp[ts->reg] = -1;
-                    }
-                    ts->val_type = TEMP_VAL_REG;
-                    ts->reg = reg;
-                    /* temp value is modified, so the value kept in memory is
-                       potentially not the same */
-                    ts->mem_coherent = 0;
-                    s->reg_to_temp[reg] = arg;
-               }
+                if (ts->val_type == TEMP_VAL_REG) {
+                    s->reg_to_temp[ts->reg] = -1;
+                }
+                ts->val_type = TEMP_VAL_REG;
+                ts->reg = reg;
+                /* temp value is modified, so the value kept in memory is
+                   potentially not the same */
+                ts->mem_coherent = 0;
+                s->reg_to_temp[reg] = arg;
             }
         oarg_end:
             new_args[i] = reg;
@@ -1900,6 +1916,12 @@ static void tcg_reg_alloc_op(TCGContext *s,
         if (ts->fixed_reg && ts->reg != reg) {
             tcg_out_mov(s, ts->type, ts->reg, reg);
         }
+        if (NEED_SYNC_ARG(i)) {
+            tcg_reg_sync(s, reg);
+        }
+        if (IS_DEAD_ARG(i)) {
+            temp_dead(s, args[i]);
+        }
     }
 }
 
@@ -1911,7 +1933,7 @@ static void tcg_reg_alloc_op(TCGContext *s,
 
 static int tcg_reg_alloc_call(TCGContext *s, const TCGOpDef *def,
                               TCGOpcode opc, const TCGArg *args,
-                              unsigned int dead_args)
+                              uint16_t dead_args, uint8_t sync_args)
 {
     int nb_iargs, nb_oargs, flags, nb_regs, i, reg, nb_params;
     TCGArg arg, func_arg;
@@ -2066,16 +2088,18 @@ static int tcg_reg_alloc_call(TCGContext *s, const TCGOpDef *def,
                 tcg_out_mov(s, ts->type, ts->reg, reg);
             }
         } else {
+            if (ts->val_type == TEMP_VAL_REG) {
+                s->reg_to_temp[ts->reg] = -1;
+            }
+            ts->val_type = TEMP_VAL_REG;
+            ts->reg = reg;
+            ts->mem_coherent = 0;
+            s->reg_to_temp[reg] = arg;
+            if (NEED_SYNC_ARG(i)) {
+                tcg_reg_sync(s, reg);
+            }
             if (IS_DEAD_ARG(i)) {
                 temp_dead(s, args[i]);
-            } else {
-                if (ts->val_type == TEMP_VAL_REG) {
-                    s->reg_to_temp[ts->reg] = -1;
-                }
-                ts->val_type = TEMP_VAL_REG;
-                ts->reg = reg;
-                ts->mem_coherent = 0;
-                s->reg_to_temp[reg] = arg;
             }
         }
     }
@@ -2106,7 +2130,6 @@ static inline int tcg_gen_code_common(TCGContext *s, uint8_t *gen_code_buf,
     TCGOpcode opc;
     int op_index;
     const TCGOpDef *def;
-    unsigned int dead_args;
     const TCGArg *args;
 
 #ifdef DEBUG_DISAS
@@ -2167,12 +2190,13 @@ static inline int tcg_gen_code_common(TCGContext *s, uint8_t *gen_code_buf,
         switch(opc) {
         case INDEX_op_mov_i32:
         case INDEX_op_mov_i64:
-            dead_args = s->op_dead_args[op_index];
-            tcg_reg_alloc_mov(s, def, args, dead_args);
+            tcg_reg_alloc_mov(s, def, args, s->op_dead_args[op_index],
+                              s->op_sync_args[op_index]);
             break;
         case INDEX_op_movi_i32:
         case INDEX_op_movi_i64:
-            tcg_reg_alloc_movi(s, args);
+            tcg_reg_alloc_movi(s, args, s->op_dead_args[op_index],
+                               s->op_sync_args[op_index]);
             break;
         case INDEX_op_debug_insn_start:
             /* debug instruction */
@@ -2193,8 +2217,9 @@ static inline int tcg_gen_code_common(TCGContext *s, uint8_t *gen_code_buf,
             tcg_out_label(s, args[0], s->code_ptr);
             break;
         case INDEX_op_call:
-            dead_args = s->op_dead_args[op_index];
-            args += tcg_reg_alloc_call(s, def, opc, args, dead_args);
+            args += tcg_reg_alloc_call(s, def, opc, args,
+                                       s->op_dead_args[op_index],
+                                       s->op_sync_args[op_index]);
             goto next;
         case INDEX_op_end:
             goto the_end;
@@ -2206,8 +2231,8 @@ static inline int tcg_gen_code_common(TCGContext *s, uint8_t *gen_code_buf,
             /* Note: in order to speed up the code, it would be much
                faster to have specialized register allocator functions for
                some common argument patterns */
-            dead_args = s->op_dead_args[op_index];
-            tcg_reg_alloc_op(s, def, opc, args, dead_args);
+            tcg_reg_alloc_op(s, def, opc, args, s->op_dead_args[op_index],
+                             s->op_sync_args[op_index]);
             break;
         }
         args += def->nb_args;
diff --git a/tcg/tcg.h b/tcg/tcg.h
index 45e94f5..cd5bf00 100644
--- a/tcg/tcg.h
+++ b/tcg/tcg.h
@@ -381,6 +381,9 @@ struct TCGContext {
     /* liveness analysis */
     uint16_t *op_dead_args; /* for each operation, each bit tells if the
                                corresponding argument is dead */
+    uint8_t *op_sync_args;  /* for each operation, each bit tells if the
+                               corresponding output argument needs to be
+                               sync to memory. */
     
     /* tells in which temporary a given register is. It does not take
        into account fixed registers */
commit 1ad80729bea3d8ed1b67b457af44cbf9bbd8c3ec
Author: Aurelien Jarno <aurelien at aurel32.net>
Date:   Tue Oct 9 21:53:06 2012 +0200

    tcg: add temp_sync()
    
    Add a new function temp_sync() to synchronize the canonical location
    of a temp with the value in the corresponding register, but without
    freeing the associated register. Rewrite temp_save() to call
    temp_sync() followed by temp_dead().
    
    Reviewed-by: Richard Henderson <rth at twiddle.net>
    Signed-off-by: Aurelien Jarno <aurelien at aurel32.net>

diff --git a/tcg/tcg.c b/tcg/tcg.c
index 7141ebb..fabf3cf 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -1593,31 +1593,27 @@ static inline void temp_dead(TCGContext *s, int temp)
     }
 }
 
-/* save a temporary to memory. 'allocated_regs' is used in case a
+/* sync a temporary to memory. 'allocated_regs' is used in case a
    temporary registers needs to be allocated to store a constant. */
-static void temp_save(TCGContext *s, int temp, TCGRegSet allocated_regs)
+static inline void temp_sync(TCGContext *s, int temp, TCGRegSet allocated_regs)
 {
     TCGTemp *ts;
-    int reg;
 
     ts = &s->temps[temp];
     if (!ts->fixed_reg) {
         switch(ts->val_type) {
+        case TEMP_VAL_CONST:
+            ts->reg = tcg_reg_alloc(s, tcg_target_available_regs[ts->type],
+                                    allocated_regs);
+            ts->val_type = TEMP_VAL_REG;
+            s->reg_to_temp[ts->reg] = temp;
+            ts->mem_coherent = 0;
+            tcg_out_movi(s, ts->type, ts->reg, ts->val);
+            /* fallthrough*/
         case TEMP_VAL_REG:
-            tcg_reg_free(s, ts->reg);
+            tcg_reg_sync(s, ts->reg);
             break;
         case TEMP_VAL_DEAD:
-            ts->val_type = TEMP_VAL_MEM;
-            break;
-        case TEMP_VAL_CONST:
-            reg = tcg_reg_alloc(s, tcg_target_available_regs[ts->type], 
-                                allocated_regs);
-            if (!ts->mem_allocated) 
-                temp_allocate_frame(s, temp);
-            tcg_out_movi(s, ts->type, reg, ts->val);
-            tcg_out_st(s, ts->type, reg, ts->mem_reg, ts->mem_offset);
-            ts->val_type = TEMP_VAL_MEM;
-            break;
         case TEMP_VAL_MEM:
             break;
         default:
@@ -1626,6 +1622,14 @@ static void temp_save(TCGContext *s, int temp, TCGRegSet allocated_regs)
     }
 }
 
+/* save a temporary to memory. 'allocated_regs' is used in case a
+   temporary registers needs to be allocated to store a constant. */
+static inline void temp_save(TCGContext *s, int temp, TCGRegSet allocated_regs)
+{
+    temp_sync(s, temp, allocated_regs);
+    temp_dead(s, temp);
+}
+
 /* save globals to their canonical location and assume they can be
    modified be the following code. 'allocated_regs' is used in case a
    temporary registers needs to be allocated to store a constant. */
commit 7f6ceedf9c5da96ee2b1fe42573b781243bc2828
Author: Aurelien Jarno <aurelien at aurel32.net>
Date:   Tue Oct 9 21:53:06 2012 +0200

    tcg: add tcg_reg_sync()
    
    Add a new function tcg_reg_sync() to synchronize the canonical location
    of a temp with the value in the associated register, but without freeing
    it. Rewrite tcg_reg_free() to first call tcg_reg_sync() and then to free
    the register.
    
    Reviewed-by: Richard Henderson <rth at twiddle.net>
    Signed-off-by: Aurelien Jarno <aurelien at aurel32.net>

diff --git a/tcg/tcg.c b/tcg/tcg.c
index 2f84287..7141ebb 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -1517,22 +1517,33 @@ static void temp_allocate_frame(TCGContext *s, int temp)
     s->current_frame_offset += (tcg_target_long)sizeof(tcg_target_long);
 }
 
+/* sync register 'reg' by saving it to the corresponding temporary */
+static inline void tcg_reg_sync(TCGContext *s, int reg)
+{
+    TCGTemp *ts;
+    int temp;
+
+    temp = s->reg_to_temp[reg];
+    ts = &s->temps[temp];
+    assert(ts->val_type == TEMP_VAL_REG);
+    if (!ts->mem_coherent && !ts->fixed_reg) {
+        if (!ts->mem_allocated) {
+            temp_allocate_frame(s, temp);
+        }
+        tcg_out_st(s, ts->type, reg, ts->mem_reg, ts->mem_offset);
+    }
+    ts->mem_coherent = 1;
+}
+
 /* free register 'reg' by spilling the corresponding temporary if necessary */
 static void tcg_reg_free(TCGContext *s, int reg)
 {
-    TCGTemp *ts;
     int temp;
 
     temp = s->reg_to_temp[reg];
     if (temp != -1) {
-        ts = &s->temps[temp];
-        assert(ts->val_type == TEMP_VAL_REG);
-        if (!ts->mem_coherent) {
-            if (!ts->mem_allocated) 
-                temp_allocate_frame(s, temp);
-            tcg_out_st(s, ts->type, reg, ts->mem_reg, ts->mem_offset);
-        }
-        ts->val_type = TEMP_VAL_MEM;
+        tcg_reg_sync(s, reg);
+        s->temps[temp].val_type = TEMP_VAL_MEM;
         s->reg_to_temp[reg] = -1;
     }
 }
commit 639368dd685f89b0567b56304c62937ad0ef8248
Author: Aurelien Jarno <aurelien at aurel32.net>
Date:   Tue Oct 9 21:53:06 2012 +0200

    tcg: add temp_dead()
    
    A lot of code is duplicated to mark a temporary as dead. Replace it
    by temp_dead(), which in addition marks the temp as saved in memory
    for globals and local temps, instead of doing this a posteriori in
    temp_save().
    
    Reviewed-by: Richard Henderson <rth at twiddle.net>
    Signed-off-by: Aurelien Jarno <aurelien at aurel32.net>

diff --git a/tcg/tcg.c b/tcg/tcg.c
index 5faaca5..2f84287 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -1564,6 +1564,24 @@ static int tcg_reg_alloc(TCGContext *s, TCGRegSet reg1, TCGRegSet reg2)
     tcg_abort();
 }
 
+/* mark a temporary as dead. */
+static inline void temp_dead(TCGContext *s, int temp)
+{
+    TCGTemp *ts;
+
+    ts = &s->temps[temp];
+    if (!ts->fixed_reg) {
+        if (ts->val_type == TEMP_VAL_REG) {
+            s->reg_to_temp[ts->reg] = -1;
+        }
+        if (temp < s->nb_globals || (ts->temp_local && ts->mem_allocated)) {
+            ts->val_type = TEMP_VAL_MEM;
+        } else {
+            ts->val_type = TEMP_VAL_DEAD;
+        }
+    }
+}
+
 /* save a temporary to memory. 'allocated_regs' is used in case a
    temporary registers needs to be allocated to store a constant. */
 static void temp_save(TCGContext *s, int temp, TCGRegSet allocated_regs)
@@ -1621,10 +1639,7 @@ static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs)
         if (ts->temp_local) {
             temp_save(s, i, allocated_regs);
         } else {
-            if (ts->val_type == TEMP_VAL_REG) {
-                s->reg_to_temp[ts->reg] = -1;
-            }
-            ts->val_type = TEMP_VAL_DEAD;
+            temp_dead(s, i);
         }
     }
 
@@ -1673,8 +1688,7 @@ static void tcg_reg_alloc_mov(TCGContext *s, const TCGOpDef *def,
             if (ots->val_type == TEMP_VAL_REG)
                 s->reg_to_temp[ots->reg] = -1;
             reg = ts->reg;
-            s->reg_to_temp[reg] = -1;
-            ts->val_type = TEMP_VAL_DEAD;
+            temp_dead(s, args[1]);
         } else {
             if (ots->val_type == TEMP_VAL_REG) {
                 reg = ots->reg;
@@ -1801,14 +1815,8 @@ static void tcg_reg_alloc_op(TCGContext *s,
     } else {
         /* mark dead temporaries and free the associated registers */
         for(i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
-            arg = args[i];
             if (IS_DEAD_ARG(i)) {
-                ts = &s->temps[arg];
-                if (!ts->fixed_reg) {
-                    if (ts->val_type == TEMP_VAL_REG)
-                        s->reg_to_temp[ts->reg] = -1;
-                    ts->val_type = TEMP_VAL_DEAD;
-                }
+                temp_dead(s, args[i]);
             }
         }
         
@@ -1848,11 +1856,12 @@ static void tcg_reg_alloc_op(TCGContext *s,
             tcg_regset_set_reg(allocated_regs, reg);
             /* if a fixed register is used, then a move will be done afterwards */
             if (!ts->fixed_reg) {
-                if (ts->val_type == TEMP_VAL_REG)
-                    s->reg_to_temp[ts->reg] = -1;
                 if (IS_DEAD_ARG(i)) {
-                    ts->val_type = TEMP_VAL_DEAD;
+                    temp_dead(s, args[i]);
                 } else {
+                    if (ts->val_type == TEMP_VAL_REG) {
+                        s->reg_to_temp[ts->reg] = -1;
+                    }
                     ts->val_type = TEMP_VAL_REG;
                     ts->reg = reg;
                     /* temp value is modified, so the value kept in memory is
@@ -2011,14 +2020,8 @@ static int tcg_reg_alloc_call(TCGContext *s, const TCGOpDef *def,
     
     /* mark dead temporaries and free the associated registers */
     for(i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
-        arg = args[i];
         if (IS_DEAD_ARG(i)) {
-            ts = &s->temps[arg];
-            if (!ts->fixed_reg) {
-                if (ts->val_type == TEMP_VAL_REG)
-                    s->reg_to_temp[ts->reg] = -1;
-                ts->val_type = TEMP_VAL_DEAD;
-            }
+            temp_dead(s, args[i]);
         }
     }
     
@@ -2048,11 +2051,12 @@ static int tcg_reg_alloc_call(TCGContext *s, const TCGOpDef *def,
                 tcg_out_mov(s, ts->type, ts->reg, reg);
             }
         } else {
-            if (ts->val_type == TEMP_VAL_REG)
-                s->reg_to_temp[ts->reg] = -1;
             if (IS_DEAD_ARG(i)) {
-                ts->val_type = TEMP_VAL_DEAD;
+                temp_dead(s, args[i]);
             } else {
+                if (ts->val_type == TEMP_VAL_REG) {
+                    s->reg_to_temp[ts->reg] = -1;
+                }
                 ts->val_type = TEMP_VAL_REG;
                 ts->reg = reg;
                 ts->mem_coherent = 0;
@@ -2167,16 +2171,7 @@ static inline int tcg_gen_code_common(TCGContext *s, uint8_t *gen_code_buf,
             args += args[0];
             goto next;
         case INDEX_op_discard:
-            {
-                TCGTemp *ts;
-                ts = &s->temps[args[0]];
-                /* mark the temporary as dead */
-                if (!ts->fixed_reg) {
-                    if (ts->val_type == TEMP_VAL_REG)
-                        s->reg_to_temp[ts->reg] = -1;
-                    ts->val_type = TEMP_VAL_DEAD;
-                }
-            }
+            temp_dead(s, args[0]);
             break;
         case INDEX_op_set_label:
             tcg_reg_alloc_bb_end(s, s->reserved_regs);
commit 17b914912d6175a5405d472832d6eba6187b6beb
Author: Aurelien Jarno <aurelien at aurel32.net>
Date:   Sat Oct 20 17:31:44 2012 +0200

    tcg/i386: remove ld/st third argument register constraint
    
    On x86_64, remove the constraint on the third argument register which
    is not needed:
     - For loads the helper arguments are env, addr, mem_idx. The addr
       value should not be in the two first argument registers as they are
       used in tcg_out_tlb_load().
     - For stores the helper arguments are env, addr, data, mem_idx.
       The addr and data values should not be in the two first argument
       registers as they are used in tcg_out_tlb_load(). The data value
       should also not be in the two first argument registers, but could
       be in the third argument register in which case it would be already
       loaded at the right location.
    
    Reviewed-by: Richard Henderson <rth at twiddle.net>
    Signed-off-by: Aurelien Jarno <aurelien at aurel32.net>

diff --git a/tcg/i386/tcg-target.c b/tcg/i386/tcg-target.c
index 4c59e33..9c8f69a 100644
--- a/tcg/i386/tcg-target.c
+++ b/tcg/i386/tcg-target.c
@@ -92,7 +92,6 @@ static const int tcg_target_call_oarg_regs[] = {
 #if TCG_TARGET_REG_BITS == 64
 # define TCG_REG_L0 tcg_target_call_iarg_regs[0]
 # define TCG_REG_L1 tcg_target_call_iarg_regs[1]
-# define TCG_REG_L2 tcg_target_call_iarg_regs[2]
 #else
 # define TCG_REG_L0 TCG_REG_EAX
 # define TCG_REG_L1 TCG_REG_EDX
@@ -181,14 +180,11 @@ static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
         ct->ct |= TCG_CT_REG;
 #if TCG_TARGET_REG_BITS == 64
             tcg_regset_set32(ct->u.regs, 0, 0xffff);
-            tcg_regset_reset_reg(ct->u.regs, TCG_REG_L0);
-            tcg_regset_reset_reg(ct->u.regs, TCG_REG_L1);
-            tcg_regset_reset_reg(ct->u.regs, TCG_REG_L2);
 #else
             tcg_regset_set32(ct->u.regs, 0, 0xff);
-            tcg_regset_reset_reg(ct->u.regs, TCG_REG_L0);
-            tcg_regset_reset_reg(ct->u.regs, TCG_REG_L1);
 #endif
+        tcg_regset_reset_reg(ct->u.regs, TCG_REG_L0);
+        tcg_regset_reset_reg(ct->u.regs, TCG_REG_L1);
         break;
 
     case 'e':
commit 166792f7bbe2cad120b75062ff6f8f1b67366f18
Author: Aurelien Jarno <aurelien at aurel32.net>
Date:   Sat Oct 20 17:31:44 2012 +0200

    tcg/i386: remove suboptimal register shifting
    
    Now that CONFIG_TCG_PASS_AREG0 has been removed, it's easier to get
    an optimal code for the load/store functions.
    
    First swap the two registers used in tcg_out_tlb_load() so that the
    address end-up in the second register instead of the first one. Adjust
    tcg_out_qemu_ld() and tcg_out_qemu_st() to respectively call
    tcg_out_qemu_ld_direct() and tcg_out_qemu_st_direct() with the correct
    registers. Then replace the register shifting by direct load of the
    arguments.
    
    Reviewed-by: Richard Henderson <rth at twiddle.net>
    Signed-off-by: Aurelien Jarno <aurelien at aurel32.net>

diff --git a/tcg/i386/tcg-target.c b/tcg/i386/tcg-target.c
index 4952c05..4c59e33 100644
--- a/tcg/i386/tcg-target.c
+++ b/tcg/i386/tcg-target.c
@@ -1016,12 +1016,12 @@ static const void *qemu_st_helpers[4] = {
    LABEL_PTRS is filled with 1 (32-bit addresses) or 2 (64-bit addresses)
    positions of the displacements of forward jumps to the TLB miss case.
 
-   First argument register is loaded with the low part of the address.
+   Second argument register is loaded with the low part of the address.
    In the TLB hit case, it has been adjusted as indicated by the TLB
    and so is a host address.  In the TLB miss case, it continues to
    hold a guest address.
 
-   Second argument register is clobbered.  */
+   First argument register is clobbered.  */
 
 static inline void tcg_out_tlb_load(TCGContext *s, int addrlo_idx,
                                     int mem_index, int s_bits,
@@ -1039,25 +1039,25 @@ static inline void tcg_out_tlb_load(TCGContext *s, int addrlo_idx,
         rexw = P_REXW;
     }
 
-    tcg_out_mov(s, type, r1, addrlo);
     tcg_out_mov(s, type, r0, addrlo);
+    tcg_out_mov(s, type, r1, addrlo);
 
-    tcg_out_shifti(s, SHIFT_SHR + rexw, r1,
+    tcg_out_shifti(s, SHIFT_SHR + rexw, r0,
                    TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
 
-    tgen_arithi(s, ARITH_AND + rexw, r0,
-                TARGET_PAGE_MASK | ((1 << s_bits) - 1), 0);
     tgen_arithi(s, ARITH_AND + rexw, r1,
+                TARGET_PAGE_MASK | ((1 << s_bits) - 1), 0);
+    tgen_arithi(s, ARITH_AND + rexw, r0,
                 (CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS, 0);
 
-    tcg_out_modrm_sib_offset(s, OPC_LEA + P_REXW, r1, TCG_AREG0, r1, 0,
+    tcg_out_modrm_sib_offset(s, OPC_LEA + P_REXW, r0, TCG_AREG0, r0, 0,
                              offsetof(CPUArchState, tlb_table[mem_index][0])
                              + which);
 
-    /* cmp 0(r1), r0 */
-    tcg_out_modrm_offset(s, OPC_CMP_GvEv + rexw, r0, r1, 0);
+    /* cmp 0(r0), r1 */
+    tcg_out_modrm_offset(s, OPC_CMP_GvEv + rexw, r1, r0, 0);
 
-    tcg_out_mov(s, type, r0, addrlo);
+    tcg_out_mov(s, type, r1, addrlo);
 
     /* jne label1 */
     tcg_out8(s, OPC_JCC_short + JCC_JNE);
@@ -1065,8 +1065,8 @@ static inline void tcg_out_tlb_load(TCGContext *s, int addrlo_idx,
     s->code_ptr++;
 
     if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
-        /* cmp 4(r1), addrhi */
-        tcg_out_modrm_offset(s, OPC_CMP_GvEv, args[addrlo_idx+1], r1, 4);
+        /* cmp 4(r0), addrhi */
+        tcg_out_modrm_offset(s, OPC_CMP_GvEv, args[addrlo_idx+1], r0, 4);
 
         /* jne label1 */
         tcg_out8(s, OPC_JCC_short + JCC_JNE);
@@ -1076,8 +1076,8 @@ static inline void tcg_out_tlb_load(TCGContext *s, int addrlo_idx,
 
     /* TLB Hit.  */
 
-    /* add addend(r1), r0 */
-    tcg_out_modrm_offset(s, OPC_ADD_GvEv + P_REXW, r0, r1,
+    /* add addend(r0), r1 */
+    tcg_out_modrm_offset(s, OPC_ADD_GvEv + P_REXW, r1, r0,
                          offsetof(CPUTLBEntry, addend) - which);
 }
 #endif
@@ -1169,9 +1169,7 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
     int addrlo_idx;
 #if defined(CONFIG_SOFTMMU)
     int mem_index, s_bits;
-#if TCG_TARGET_REG_BITS == 64
-    int arg_idx;
-#else
+#if TCG_TARGET_REG_BITS == 32
     int stack_adjust;
 #endif
     uint8_t *label_ptr[3];
@@ -1192,7 +1190,7 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
                      label_ptr, offsetof(CPUTLBEntry, addr_read));
 
     /* TLB Hit.  */
-    tcg_out_qemu_ld_direct(s, data_reg, data_reg2, TCG_REG_L0, 0, opc);
+    tcg_out_qemu_ld_direct(s, data_reg, data_reg2, TCG_REG_L1, 0, opc);
 
     /* jmp label2 */
     tcg_out8(s, OPC_JMP_short);
@@ -1220,15 +1218,9 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
     tcg_out_push(s, TCG_AREG0);
     stack_adjust += 4;
 #else
-    /* The first argument is already loaded with addrlo.  */
-    arg_idx = 1;
-    tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[arg_idx],
-                 mem_index);
-    /* XXX/FIXME: suboptimal */
-    tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[3], TCG_REG_L2);
-    tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[2], TCG_REG_L1);
-    tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[1], TCG_REG_L0);
     tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[0], TCG_AREG0);
+    /* The second argument is already loaded with addrlo.  */
+    tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[2], mem_index);
 #endif
 
     tcg_out_calli(s, (tcg_target_long)qemu_ld_helpers[s_bits]);
@@ -1294,9 +1286,9 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
                use the ADDR32 prefix.  For now, do nothing.  */
 
             if (offset != GUEST_BASE) {
-                tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_L0, GUEST_BASE);
-                tgen_arithr(s, ARITH_ADD + P_REXW, TCG_REG_L0, base);
-                base = TCG_REG_L0;
+                tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_L1, GUEST_BASE);
+                tgen_arithr(s, ARITH_ADD + P_REXW, TCG_REG_L1, base);
+                base = TCG_REG_L1;
                 offset = 0;
             }
         }
@@ -1317,8 +1309,8 @@ static void tcg_out_qemu_st_direct(TCGContext *s, int datalo, int datahi,
     /* ??? Ideally we wouldn't need a scratch register.  For user-only,
        we could perform the bswap twice to restore the original value
        instead of moving to the scratch.  But as it is, the L constraint
-       means that TCG_REG_L1 is definitely free here.  */
-    const int scratch = TCG_REG_L1;
+       means that TCG_REG_L0 is definitely free here.  */
+    const int scratch = TCG_REG_L0;
 
     switch (sizeop) {
     case 0:
@@ -1391,7 +1383,7 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
                      label_ptr, offsetof(CPUTLBEntry, addr_write));
 
     /* TLB Hit.  */
-    tcg_out_qemu_st_direct(s, data_reg, data_reg2, TCG_REG_L0, 0, opc);
+    tcg_out_qemu_st_direct(s, data_reg, data_reg2, TCG_REG_L1, 0, opc);
 
     /* jmp label2 */
     tcg_out8(s, OPC_JMP_short);
@@ -1425,15 +1417,12 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
     tcg_out_push(s, TCG_AREG0);
     stack_adjust += 4;
 #else
+    tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[0], TCG_AREG0);
+    /* The second argument is already loaded with addrlo.  */
     tcg_out_mov(s, (opc == 3 ? TCG_TYPE_I64 : TCG_TYPE_I32),
-                TCG_REG_L1, data_reg);
-    tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_L2, mem_index);
+                tcg_target_call_iarg_regs[2], data_reg);
+    tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[3], mem_index);
     stack_adjust = 0;
-    /* XXX/FIXME: suboptimal */
-    tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[3], TCG_REG_L2);
-    tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[2], TCG_REG_L1);
-    tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[1], TCG_REG_L0);
-    tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[0], TCG_AREG0);
 #endif
 
     tcg_out_calli(s, (tcg_target_long)qemu_st_helpers[s_bits]);
@@ -1460,9 +1449,9 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
                use the ADDR32 prefix.  For now, do nothing.  */
 
             if (offset != GUEST_BASE) {
-                tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_L0, GUEST_BASE);
-                tgen_arithr(s, ARITH_ADD + P_REXW, TCG_REG_L0, base);
-                base = TCG_REG_L0;
+                tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_L1, GUEST_BASE);
+                tgen_arithr(s, ARITH_ADD + P_REXW, TCG_REG_L1, base);
+                base = TCG_REG_L1;
                 offset = 0;
             }
         }
commit 50cd72148211c5e5f22ea2519d19ce024226e61f
Author: Max Filippov <jcmvbkbc at gmail.com>
Date:   Thu Oct 25 15:37:52 2012 +0400

    hw/xtensa_sim: get rid of intermediate xtensa_sim_init
    
    Remove xtensa_sim_init that only explodes machine init args, rename
    sim_init to xtensa_sim_init.
    
    Signed-off-by: Max Filippov <jcmvbkbc at gmail.com>
    Signed-off-by: Blue Swirl <blauwirbel at gmail.com>

diff --git a/hw/xtensa_sim.c b/hw/xtensa_sim.c
index 2e846d8..0d633e4 100644
--- a/hw/xtensa_sim.c
+++ b/hw/xtensa_sim.c
@@ -44,16 +44,20 @@ static void sim_reset(void *opaque)
     cpu_reset(CPU(cpu));
 }
 
-static void sim_init(ram_addr_t ram_size,
-        const char *boot_device,
-        const char *kernel_filename, const char *kernel_cmdline,
-        const char *initrd_filename, const char *cpu_model)
+static void xtensa_sim_init(QEMUMachineInitArgs *args)
 {
     XtensaCPU *cpu = NULL;
     CPUXtensaState *env = NULL;
     MemoryRegion *ram, *rom;
+    ram_addr_t ram_size = args->ram_size;
+    const char *cpu_model = args->cpu_model;
+    const char *kernel_filename = args->kernel_filename;
     int n;
 
+    if (!cpu_model) {
+        cpu_model = XTENSA_DEFAULT_CPU_MODEL;
+    }
+
     for (n = 0; n < smp_cpus; n++) {
         cpu = cpu_xtensa_init(cpu_model);
         if (cpu == NULL) {
@@ -96,21 +100,6 @@ static void sim_init(ram_addr_t ram_size,
     }
 }
 
-static void xtensa_sim_init(QEMUMachineInitArgs *args)
-{
-    ram_addr_t ram_size = args->ram_size;
-    const char *cpu_model = args->cpu_model;
-    const char *kernel_filename = args->kernel_filename;
-    const char *kernel_cmdline = args->kernel_cmdline;
-    const char *initrd_filename = args->initrd_filename;
-    const char *boot_device = args->boot_device;
-    if (!cpu_model) {
-        cpu_model = XTENSA_DEFAULT_CPU_MODEL;
-    }
-    sim_init(ram_size, boot_device, kernel_filename, kernel_cmdline,
-            initrd_filename, cpu_model);
-}
-
 static QEMUMachine xtensa_sim_machine = {
     .name = "sim",
     .desc = "sim machine (" XTENSA_DEFAULT_CPU_MODEL ")",
commit d64ed08eeca9587d28d573d81692721cea3273ca
Author: Max Filippov <jcmvbkbc at gmail.com>
Date:   Thu Oct 25 15:37:51 2012 +0400

    hw/xtensa_lx60: don't prematurely explode QEMUMachineInitArgs
    
    Don't explode QEMUMachineInitArgs before passing it to lx_init.
    
    Signed-off-by: Max Filippov <jcmvbkbc at gmail.com>
    Signed-off-by: Blue Swirl <blauwirbel at gmail.com>

diff --git a/hw/xtensa_lx60.c b/hw/xtensa_lx60.c
index 5dd2e08..4c42edc 100644
--- a/hw/xtensa_lx60.c
+++ b/hw/xtensa_lx60.c
@@ -155,10 +155,7 @@ static void lx60_reset(void *opaque)
     cpu_reset(CPU(cpu));
 }
 
-static void lx_init(const LxBoardDesc *board,
-        ram_addr_t ram_size, const char *boot_device,
-        const char *kernel_filename, const char *kernel_cmdline,
-        const char *initrd_filename, const char *cpu_model)
+static void lx_init(const LxBoardDesc *board, QEMUMachineInitArgs *args)
 {
 #ifdef TARGET_WORDS_BIGENDIAN
     int be = 1;
@@ -171,6 +168,9 @@ static void lx_init(const LxBoardDesc *board,
     MemoryRegion *ram, *rom, *system_io;
     DriveInfo *dinfo;
     pflash_t *flash = NULL;
+    const char *cpu_model = args->cpu_model;
+    const char *kernel_filename = args->kernel_filename;
+    const char *kernel_cmdline = args->kernel_cmdline;
     int n;
 
     if (!cpu_model) {
@@ -194,7 +194,7 @@ static void lx_init(const LxBoardDesc *board,
     }
 
     ram = g_malloc(sizeof(*ram));
-    memory_region_init_ram(ram, "lx60.dram", ram_size);
+    memory_region_init_ram(ram, "lx60.dram", args->ram_size);
     vmstate_register_ram_global(ram);
     memory_region_add_subregion(system_memory, 0, ram);
 
@@ -271,38 +271,22 @@ static void lx_init(const LxBoardDesc *board,
 
 static void xtensa_lx60_init(QEMUMachineInitArgs *args)
 {
-    ram_addr_t ram_size = args->ram_size;
-    const char *cpu_model = args->cpu_model;
-    const char *kernel_filename = args->kernel_filename;
-    const char *kernel_cmdline = args->kernel_cmdline;
-    const char *initrd_filename = args->initrd_filename;
-    const char *boot_device = args->boot_device;
     static const LxBoardDesc lx60_board = {
         .flash_size = 0x400000,
         .flash_sector_size = 0x10000,
         .sram_size = 0x20000,
     };
-    lx_init(&lx60_board, ram_size, boot_device,
-            kernel_filename, kernel_cmdline,
-            initrd_filename, cpu_model);
+    lx_init(&lx60_board, args);
 }
 
 static void xtensa_lx200_init(QEMUMachineInitArgs *args)
 {
-    ram_addr_t ram_size = args->ram_size;
-    const char *cpu_model = args->cpu_model;
-    const char *kernel_filename = args->kernel_filename;
-    const char *kernel_cmdline = args->kernel_cmdline;
-    const char *initrd_filename = args->initrd_filename;
-    const char *boot_device = args->boot_device;
     static const LxBoardDesc lx200_board = {
         .flash_size = 0x1000000,
         .flash_sector_size = 0x20000,
         .sram_size = 0x2000000,
     };
-    lx_init(&lx200_board, ram_size, boot_device,
-            kernel_filename, kernel_cmdline,
-            initrd_filename, cpu_model);
+    lx_init(&lx200_board, args);
 }
 
 static QEMUMachine xtensa_lx60_machine = {
commit d1bd2423a90a144576cd140dc84b1fe8c7c8fda0
Author: Peter Maydell <peter.maydell at linaro.org>
Date:   Fri Oct 19 14:54:23 2012 +0100

    Makefile: Forbid out-of-tree build from a source tree that has been built in
    
    If we try to do an out-of-tree build but the source tree we're building from
    has been used in the past for an in-tree build then things will go
    confusingly wrong. Specifically, some parts of the build process will pull
    in generated files from the old in-tree build (because SRC_PATH is on
    the vpath). Diagnose this situation so we can produce a useful error
    message and tell the user how to fix it (run distclean in the source tree).
    
    Signed-off-by: Peter Maydell <peter.maydell at linaro.org>
    Signed-off-by: Blue Swirl <blauwirbel at gmail.com>

diff --git a/Makefile b/Makefile
index 17e2d58..b522b10 100644
--- a/Makefile
+++ b/Makefile
@@ -8,6 +8,17 @@ ifneq ($(wildcard config-host.mak),)
 # Put the all: rule here so that config-host.mak can contain dependencies.
 all:
 include config-host.mak
+
+# Check that we're not trying to do an out-of-tree build from
+# a tree that's been used for an in-tree build.
+ifneq ($(realpath $(SRC_PATH)),$(realpath .))
+ifneq ($(wildcard $(SRC_PATH)/config-host.mak),)
+$(error This is an out of tree build but your source tree ($(SRC_PATH)) \
+seems to have been used for an in-tree build. You can fix this by running \
+"make distclean && rm -rf *-linux-user *-softmmu" in your source tree)
+endif
+endif
+
 include $(SRC_PATH)/rules.mak
 config-host.mak: $(SRC_PATH)/configure
 	@echo $@ is out-of-date, running configure
commit 49cdaea18b6a827fbe9555efef12821909e59229
Author: Catalin Patulea <catalinp at google.com>
Date:   Mon Oct 22 19:18:35 2012 -0400

    tests/tcg: fix a few warnings
    
    Signed-off-by: Catalin Patulea <catalinp at google.com>
    Signed-off-by: Blue Swirl <blauwirbel at gmail.com>

diff --git a/tests/tcg/hello-i386.c b/tests/tcg/hello-i386.c
index 86afc34..fa00380 100644
--- a/tests/tcg/hello-i386.c
+++ b/tests/tcg/hello-i386.c
@@ -1,6 +1,6 @@
 #include <asm/unistd.h>
 
-static inline volatile void exit(int status)
+static inline void exit(int status)
 {
   int __res;
   __asm__ volatile ("movl %%ecx,%%ebx\n"\
@@ -17,6 +17,7 @@ static inline int write(int fd, const char * buf, int len)
 		    "popl %%ebx\n"\
 		    : "=a" (status) \
 		    : "0" (__NR_write),"S" ((long)(fd)),"c" ((long)(buf)),"d" ((long)(len)));
+  return status;
 }
 
 void _start(void)
diff --git a/tests/tcg/test-i386.c b/tests/tcg/test-i386.c
index 64d929e..40392ac 100644
--- a/tests/tcg/test-i386.c
+++ b/tests/tcg/test-i386.c
@@ -785,7 +785,7 @@ void fpu_clear_exceptions(void)
         long double fpregs[8];
     } float_env32;
 
-    asm volatile ("fnstenv %0\n" : : "m" (float_env32));
+    asm volatile ("fnstenv %0\n" : "=m" (float_env32));
     float_env32.fpus &= ~0x7f;
     asm volatile ("fldenv %0\n" : : "m" (float_env32));
 }
commit 0356404b0f1da939657cad1efeb556745cd430d5
Author: Aurelien Jarno <aurelien at aurel32.net>
Date:   Mon Oct 22 00:50:58 2012 +0200

    target-sparc64: disable VGA cirrus
    
    OpenBIOS on sparc64 only support Standard VGA and not Cirrus VGA. Don't
    build Cirrus VGA support so that it can't be selected.
    
    This fixes the breakage introduced by commit f2898771.
    
    Reported-by: Richard Henderson <rth at twiddle.net>
    Cc: Blue Swirl <blauwirbel at gmail.com>
    Signed-off-by: Aurelien Jarno <aurelien at aurel32.net>
    Tested-by: Richard Henderson <rth at twiddle.net>
    Signed-off-by: Blue Swirl <blauwirbel at gmail.com>

diff --git a/default-configs/sparc64-softmmu.mak b/default-configs/sparc64-softmmu.mak
index c9a36c1..03e8b42 100644
--- a/default-configs/sparc64-softmmu.mak
+++ b/default-configs/sparc64-softmmu.mak
@@ -6,7 +6,6 @@ CONFIG_M48T59=y
 CONFIG_PTIMER=y
 CONFIG_VGA=y
 CONFIG_VGA_PCI=y
-CONFIG_VGA_CIRRUS=y
 CONFIG_SERIAL=y
 CONFIG_PARALLEL=y
 CONFIG_PCKBD=y
commit 8a527317059bb6ece8a01a8d5be92b5452fa5ad0
Merge: 9bca816... 8b279a6...
Author: Blue Swirl <blauwirbel at gmail.com>
Date:   Sat Oct 27 14:21:37 2012 +0000

    Merge branch 'target-arm.for-upstream' of git://git.linaro.org/people/pmaydell/qemu-arm
    
    * 'target-arm.for-upstream' of git://git.linaro.org/people/pmaydell/qemu-arm:
      target-arm: Remove out of date FIXME regarding saturating arithmetic
      target-arm: Implement abs_i32 inline rather than as a helper
      target-arm: Use TCG operation for Neon 64 bit negation
      arm-semi.c: Handle get/put_user() failure accessing arguments

commit 9bca81624ef9299b9a06013fd29cd6899079aab4
Author: Bruce Rogers <brogers at suse.com>
Date:   Mon Aug 20 12:45:08 2012 -0600

    configure: avoid compiler warning in pipe2 detection
    
    When building qemu-kvm for openSUSE:Factory, I am getting a
    warning in the pipe2 detection performed by configure, which
    prevents using --enable-werror.
    
    Change detection code to use return value of pipe2.
    
    Signed-off-by: Bruce Rogers <brogers at suse.com>
    Reviewed-by: Peter Maydell <peter.maydell at linaro.org>
    Signed-off-by: Blue Swirl <blauwirbel at gmail.com>

diff --git a/configure b/configure
index 34ba9c7..37f712d 100755
--- a/configure
+++ b/configure
@@ -2399,8 +2399,7 @@ cat > $TMPC << EOF
 int main(void)
 {
     int pipefd[2];
-    pipe2(pipefd, O_CLOEXEC);
-    return 0;
+    return pipe2(pipefd, O_CLOEXEC);
 }
 EOF
 if compile_prog "" "" ; then
commit c1556a812a16023894e53c7b2215929741a5bf17
Author: Peter Maydell <peter.maydell at linaro.org>
Date:   Sun Oct 14 21:00:39 2012 +0100

    configure: Disable (clang) initializer-overrides warnings
    
    Disable clang's initializer-overrides warnings, as QEMU makes significant
    use of the pattern of initializing an array with a range-based default
    entry like
        [0 ... 0x1ff] = { GPIO_NONE, 0 }
    followed by specific entries which override that default, and clang
    would otherwise warn "initializer overrides prior initialization of
    this subobject" when it encountered the specific entry.
    
    Signed-off-by: Peter Maydell <peter.maydell at linaro.org>
    Signed-off-by: Blue Swirl <blauwirbel at gmail.com>

diff --git a/configure b/configure
index fa5657f..34ba9c7 100755
--- a/configure
+++ b/configure
@@ -1160,6 +1160,7 @@ gcc_flags="-Wold-style-declaration -Wold-style-definition -Wtype-limits"
 gcc_flags="-Wformat-security -Wformat-y2k -Winit-self -Wignored-qualifiers $gcc_flags"
 gcc_flags="-Wmissing-include-dirs -Wempty-body -Wnested-externs $gcc_flags"
 gcc_flags="-fstack-protector-all -Wendif-labels $gcc_flags"
+gcc_flags="-Wno-initializer-overrides $gcc_flags"
 # Note that we do not add -Werror to gcc_flags here, because that would
 # enable it for all configure tests. If a configure test failed due
 # to -Werror this would just silently disable some features,
commit 0ebfb144e8ad3f2da436d630fdcc5aa9ab646341
Author: Gerd Hoffmann <kraxel at redhat.com>
Date:   Wed Oct 24 16:19:21 2012 +0200

    xhci: fix usb name in caps
    
    Used to be "UTB" not "USB".
    
    Signed-off-by: Gerd Hoffmann <kraxel at redhat.com>

diff --git a/hw/usb/hcd-xhci.c b/hw/usb/hcd-xhci.c
index 25b04cd..7b65741 100644
--- a/hw/usb/hcd-xhci.c
+++ b/hw/usb/hcd-xhci.c
@@ -2430,7 +2430,7 @@ static uint64_t xhci_cap_read(void *ptr, hwaddr reg, unsigned size)
         ret = 0x02000402; /* USB 2.0 */
         break;
     case 0x24: /* Supported Protocol:04 */
-        ret = 0x20425455; /* "USB " */
+        ret = 0x20425355; /* "USB " */
         break;
     case 0x28: /* Supported Protocol:08 */
         ret = 0x00000001 | (xhci->numports_2<<8);
@@ -2442,7 +2442,7 @@ static uint64_t xhci_cap_read(void *ptr, hwaddr reg, unsigned size)
         ret = 0x03000002; /* USB 3.0 */
         break;
     case 0x34: /* Supported Protocol:04 */
-        ret = 0x20425455; /* "USB " */
+        ret = 0x20425355; /* "USB " */
         break;
     case 0x38: /* Supported Protocol:08 */
         ret = 0x00000000 | (xhci->numports_2+1) | (xhci->numports_3<<8);
commit 91062ae00fd4a88a40c28288e8ac7fe50ee83adf
Author: Gerd Hoffmann <kraxel at redhat.com>
Date:   Wed Oct 24 11:49:30 2012 +0200

    xhci: make number of interrupters and slots configurable
    
    Add properties to tweak the numbers of available interrupters and slots.
    
    Signed-off-by: Gerd Hoffmann <kraxel at redhat.com>

diff --git a/hw/usb/hcd-xhci.c b/hw/usb/hcd-xhci.c
index bd8d4a5..25b04cd 100644
--- a/hw/usb/hcd-xhci.c
+++ b/hw/usb/hcd-xhci.c
@@ -417,6 +417,8 @@ struct XHCIState {
     /* properties */
     uint32_t numports_2;
     uint32_t numports_3;
+    uint32_t numintrs;
+    uint32_t numslots;
     uint32_t flags;
 
     /* Operational Registers */
@@ -816,8 +818,8 @@ static void xhci_event(XHCIState *xhci, XHCIEvent *event, int v)
     dma_addr_t erdp;
     unsigned int dp_idx;
 
-    if (v >= MAXINTRS) {
-        DPRINTF("intr nr out of range (%d >= %d)\n", v, MAXINTRS);
+    if (v >= xhci->numintrs) {
+        DPRINTF("intr nr out of range (%d >= %d)\n", v, xhci->numintrs);
         return;
     }
     intr = &xhci->intr[v];
@@ -1043,7 +1045,7 @@ static TRBCCode xhci_enable_ep(XHCIState *xhci, unsigned int slotid,
     int i;
 
     trace_usb_xhci_ep_enable(slotid, epid);
-    assert(slotid >= 1 && slotid <= MAXSLOTS);
+    assert(slotid >= 1 && slotid <= xhci->numslots);
     assert(epid >= 1 && epid <= 31);
 
     slot = &xhci->slots[slotid-1];
@@ -1121,7 +1123,7 @@ static int xhci_ep_nuke_xfers(XHCIState *xhci, unsigned int slotid,
     XHCISlot *slot;
     XHCIEPContext *epctx;
     int i, xferi, killed = 0;
-    assert(slotid >= 1 && slotid <= MAXSLOTS);
+    assert(slotid >= 1 && slotid <= xhci->numslots);
     assert(epid >= 1 && epid <= 31);
 
     DPRINTF("xhci_ep_nuke_xfers(%d, %d)\n", slotid, epid);
@@ -1149,7 +1151,7 @@ static TRBCCode xhci_disable_ep(XHCIState *xhci, unsigned int slotid,
     XHCIEPContext *epctx;
 
     trace_usb_xhci_ep_disable(slotid, epid);
-    assert(slotid >= 1 && slotid <= MAXSLOTS);
+    assert(slotid >= 1 && slotid <= xhci->numslots);
     assert(epid >= 1 && epid <= 31);
 
     slot = &xhci->slots[slotid-1];
@@ -1179,7 +1181,7 @@ static TRBCCode xhci_stop_ep(XHCIState *xhci, unsigned int slotid,
     XHCIEPContext *epctx;
 
     trace_usb_xhci_ep_stop(slotid, epid);
-    assert(slotid >= 1 && slotid <= MAXSLOTS);
+    assert(slotid >= 1 && slotid <= xhci->numslots);
 
     if (epid < 1 || epid > 31) {
         fprintf(stderr, "xhci: bad ep %d\n", epid);
@@ -1213,7 +1215,7 @@ static TRBCCode xhci_reset_ep(XHCIState *xhci, unsigned int slotid,
     USBDevice *dev;
 
     trace_usb_xhci_ep_reset(slotid, epid);
-    assert(slotid >= 1 && slotid <= MAXSLOTS);
+    assert(slotid >= 1 && slotid <= xhci->numslots);
 
     if (epid < 1 || epid > 31) {
         fprintf(stderr, "xhci: bad ep %d\n", epid);
@@ -1263,7 +1265,7 @@ static TRBCCode xhci_set_ep_dequeue(XHCIState *xhci, unsigned int slotid,
     XHCIEPContext *epctx;
     dma_addr_t dequeue;
 
-    assert(slotid >= 1 && slotid <= MAXSLOTS);
+    assert(slotid >= 1 && slotid <= xhci->numslots);
 
     if (epid < 1 || epid > 31) {
         fprintf(stderr, "xhci: bad ep %d\n", epid);
@@ -1667,7 +1669,7 @@ static void xhci_kick_ep(XHCIState *xhci, unsigned int slotid, unsigned int epid
     int i;
 
     trace_usb_xhci_ep_kick(slotid, epid);
-    assert(slotid >= 1 && slotid <= MAXSLOTS);
+    assert(slotid >= 1 && slotid <= xhci->numslots);
     assert(epid >= 1 && epid <= 31);
 
     if (!xhci->slots[slotid-1].enabled) {
@@ -1787,7 +1789,7 @@ static void xhci_kick_ep(XHCIState *xhci, unsigned int slotid, unsigned int epid
 static TRBCCode xhci_enable_slot(XHCIState *xhci, unsigned int slotid)
 {
     trace_usb_xhci_slot_enable(slotid);
-    assert(slotid >= 1 && slotid <= MAXSLOTS);
+    assert(slotid >= 1 && slotid <= xhci->numslots);
     xhci->slots[slotid-1].enabled = 1;
     xhci->slots[slotid-1].uport = NULL;
     memset(xhci->slots[slotid-1].eps, 0, sizeof(XHCIEPContext*)*31);
@@ -1800,7 +1802,7 @@ static TRBCCode xhci_disable_slot(XHCIState *xhci, unsigned int slotid)
     int i;
 
     trace_usb_xhci_slot_disable(slotid);
-    assert(slotid >= 1 && slotid <= MAXSLOTS);
+    assert(slotid >= 1 && slotid <= xhci->numslots);
 
     for (i = 1; i <= 31; i++) {
         if (xhci->slots[slotid-1].eps[i-1]) {
@@ -1852,7 +1854,7 @@ static TRBCCode xhci_address_slot(XHCIState *xhci, unsigned int slotid,
     TRBCCode res;
 
     trace_usb_xhci_slot_address(slotid);
-    assert(slotid >= 1 && slotid <= MAXSLOTS);
+    assert(slotid >= 1 && slotid <= xhci->numslots);
 
     dcbaap = xhci_addr64(xhci->dcbaap_low, xhci->dcbaap_high);
     pci_dma_read(&xhci->pci_dev, dcbaap + 8*slotid, &poctx, sizeof(poctx));
@@ -1891,7 +1893,7 @@ static TRBCCode xhci_address_slot(XHCIState *xhci, unsigned int slotid,
         return CC_USB_TRANSACTION_ERROR;
     }
 
-    for (i = 0; i < MAXSLOTS; i++) {
+    for (i = 0; i < xhci->numslots; i++) {
         if (xhci->slots[i].uport == uport) {
             fprintf(stderr, "xhci: port %s already assigned to slot %d\n",
                     uport->path, i+1);
@@ -1940,7 +1942,7 @@ static TRBCCode xhci_configure_slot(XHCIState *xhci, unsigned int slotid,
     TRBCCode res;
 
     trace_usb_xhci_slot_configure(slotid);
-    assert(slotid >= 1 && slotid <= MAXSLOTS);
+    assert(slotid >= 1 && slotid <= xhci->numslots);
 
     ictx = xhci_mask64(pictx);
     octx = xhci->slots[slotid-1].ctx;
@@ -2028,7 +2030,7 @@ static TRBCCode xhci_evaluate_slot(XHCIState *xhci, unsigned int slotid,
     uint32_t slot_ctx[4];
 
     trace_usb_xhci_slot_evaluate(slotid);
-    assert(slotid >= 1 && slotid <= MAXSLOTS);
+    assert(slotid >= 1 && slotid <= xhci->numslots);
 
     ictx = xhci_mask64(pictx);
     octx = xhci->slots[slotid-1].ctx;
@@ -2091,7 +2093,7 @@ static TRBCCode xhci_reset_slot(XHCIState *xhci, unsigned int slotid)
     int i;
 
     trace_usb_xhci_slot_reset(slotid);
-    assert(slotid >= 1 && slotid <= MAXSLOTS);
+    assert(slotid >= 1 && slotid <= xhci->numslots);
 
     octx = xhci->slots[slotid-1].ctx;
 
@@ -2117,7 +2119,7 @@ static unsigned int xhci_get_slot(XHCIState *xhci, XHCIEvent *event, XHCITRB *tr
 {
     unsigned int slotid;
     slotid = (trb->control >> TRB_CR_SLOTID_SHIFT) & TRB_CR_SLOTID_MASK;
-    if (slotid < 1 || slotid > MAXSLOTS) {
+    if (slotid < 1 || slotid > xhci->numslots) {
         fprintf(stderr, "xhci: bad slot id %d\n", slotid);
         event->ccode = CC_TRB_ERROR;
         return 0;
@@ -2209,12 +2211,12 @@ static void xhci_process_commands(XHCIState *xhci)
         event.ptr = addr;
         switch (type) {
         case CR_ENABLE_SLOT:
-            for (i = 0; i < MAXSLOTS; i++) {
+            for (i = 0; i < xhci->numslots; i++) {
                 if (!xhci->slots[i].enabled) {
                     break;
                 }
             }
-            if (i >= MAXSLOTS) {
+            if (i >= xhci->numslots) {
                 fprintf(stderr, "xhci: no device slots available\n");
                 event.ccode = CC_NO_SLOTS_ERROR;
             } else {
@@ -2361,7 +2363,7 @@ static void xhci_reset(DeviceState *dev)
     xhci->config = 0;
     xhci->devaddr = 2;
 
-    for (i = 0; i < MAXSLOTS; i++) {
+    for (i = 0; i < xhci->numslots; i++) {
         xhci_disable_slot(xhci, i+1);
     }
 
@@ -2369,7 +2371,7 @@ static void xhci_reset(DeviceState *dev)
         xhci_update_port(xhci, xhci->ports + i, 0);
     }
 
-    for (i = 0; i < MAXINTRS; i++) {
+    for (i = 0; i < xhci->numintrs; i++) {
         xhci->intr[i].iman = 0;
         xhci->intr[i].imod = 0;
         xhci->intr[i].erstsz = 0;
@@ -2401,7 +2403,7 @@ static uint64_t xhci_cap_read(void *ptr, hwaddr reg, unsigned size)
         break;
     case 0x04: /* HCSPARAMS 1 */
         ret = ((xhci->numports_2+xhci->numports_3)<<24)
-            | (MAXINTRS<<8) | MAXSLOTS;
+            | (xhci->numintrs<<8) | xhci->numslots;
         break;
     case 0x08: /* HCSPARAMS 2 */
         ret = 0x0000000f;
@@ -2756,7 +2758,7 @@ static void xhci_doorbell_write(void *ptr, hwaddr reg,
                     (uint32_t)val);
         }
     } else {
-        if (reg > MAXSLOTS) {
+        if (reg > xhci->numslots) {
             fprintf(stderr, "xhci: bad doorbell %d\n", (int)reg);
         } else if (val > 31) {
             fprintf(stderr, "xhci: bad doorbell %d write: 0x%x\n",
@@ -2862,7 +2864,7 @@ static void xhci_child_detach(USBPort *uport, USBDevice *child)
     XHCIState *xhci = container_of(bus, XHCIState, bus);
     int i;
 
-    for (i = 0; i < MAXSLOTS; i++) {
+    for (i = 0; i < xhci->numslots; i++) {
         if (xhci->slots[i].uport == uport) {
             xhci->slots[i].uport = NULL;
         }
@@ -2882,7 +2884,7 @@ static int xhci_find_slotid(XHCIState *xhci, USBDevice *dev)
     XHCISlot *slot;
     int slotid;
 
-    for (slotid = 1; slotid <= MAXSLOTS; slotid++) {
+    for (slotid = 1; slotid <= xhci->numslots; slotid++) {
         slot = &xhci->slots[slotid-1];
         if (slot->devaddr == dev->addr) {
             return slotid;
@@ -2978,6 +2980,19 @@ static int usb_xhci_initfn(struct PCIDevice *dev)
 
     usb_xhci_init(xhci, &dev->qdev);
 
+    if (xhci->numintrs > MAXINTRS) {
+        xhci->numintrs = MAXINTRS;
+    }
+    if (xhci->numintrs < 1) {
+        xhci->numintrs = 1;
+    }
+    if (xhci->numslots > MAXSLOTS) {
+        xhci->numslots = MAXSLOTS;
+    }
+    if (xhci->numslots < 1) {
+        xhci->numslots = 1;
+    }
+
     xhci->mfwrap_timer = qemu_new_timer_ns(vm_clock, xhci_mfwrap_timer, xhci);
 
     xhci->irq = xhci->pci_dev.irq[0];
@@ -3014,10 +3029,10 @@ static int usb_xhci_initfn(struct PCIDevice *dev)
     assert(ret >= 0);
 
     if (xhci->flags & (1 << XHCI_FLAG_USE_MSI)) {
-        msi_init(&xhci->pci_dev, 0x70, MAXINTRS, true, false);
+        msi_init(&xhci->pci_dev, 0x70, xhci->numintrs, true, false);
     }
     if (xhci->flags & (1 << XHCI_FLAG_USE_MSI_X)) {
-        msix_init(&xhci->pci_dev, MAXINTRS,
+        msix_init(&xhci->pci_dev, xhci->numintrs,
                   &xhci->mem, 0, OFF_MSIX_TABLE,
                   &xhci->mem, 0, OFF_MSIX_PBA,
                   0x90);
@@ -3032,10 +3047,12 @@ static const VMStateDescription vmstate_xhci = {
 };
 
 static Property xhci_properties[] = {
-    DEFINE_PROP_BIT("msi",    XHCIState, flags, XHCI_FLAG_USE_MSI, true),
-    DEFINE_PROP_BIT("msix",   XHCIState, flags, XHCI_FLAG_USE_MSI_X, true),
-    DEFINE_PROP_UINT32("p2",  XHCIState, numports_2, 4),
-    DEFINE_PROP_UINT32("p3",  XHCIState, numports_3, 4),
+    DEFINE_PROP_BIT("msi",      XHCIState, flags, XHCI_FLAG_USE_MSI, true),
+    DEFINE_PROP_BIT("msix",     XHCIState, flags, XHCI_FLAG_USE_MSI_X, true),
+    DEFINE_PROP_UINT32("intrs", XHCIState, numintrs, MAXINTRS),
+    DEFINE_PROP_UINT32("slots", XHCIState, numslots, MAXSLOTS),
+    DEFINE_PROP_UINT32("p2",    XHCIState, numports_2, 4),
+    DEFINE_PROP_UINT32("p3",    XHCIState, numports_3, 4),
     DEFINE_PROP_END_OF_LIST(),
 };
 
commit e099ad4b7e9ca7debdd624a0d465a33198a6844f
Author: Gerd Hoffmann <kraxel at redhat.com>
Date:   Wed Oct 24 09:38:08 2012 +0200

    xhci: allow disabling interrupters
    
    For secondary interrupters this is explicitly allowed in the specs.
    For the primary interrupter behavior is undefined, lets be friendly
    and allow disabling too.
    
    Signed-off-by: Gerd Hoffmann <kraxel at redhat.com>

diff --git a/hw/usb/hcd-xhci.c b/hw/usb/hcd-xhci.c
index d8d1226..bd8d4a5 100644
--- a/hw/usb/hcd-xhci.c
+++ b/hw/usb/hcd-xhci.c
@@ -964,6 +964,12 @@ static void xhci_er_reset(XHCIState *xhci, int v)
     XHCIInterrupter *intr = &xhci->intr[v];
     XHCIEvRingSeg seg;
 
+    if (intr->erstsz == 0) {
+        /* disabled */
+        intr->er_start = 0;
+        intr->er_size = 0;
+        return;
+    }
     /* cache the (sole) event ring segment location */
     if (intr->erstsz != 1) {
         fprintf(stderr, "xhci: invalid value for ERSTSZ: %d\n", intr->erstsz);
commit 3f973ee84ef51e448566c08ec5cfe746c9640269
Author: Gerd Hoffmann <kraxel at redhat.com>
Date:   Wed Oct 24 10:14:16 2012 +0200

    xhci: flush endpoint context unconditinally
    
    Not updating the endpoint context in case the state didn't change is
    wrong.  Other context fields might have changed, for example the
    dequeue pointer in response to a CR_SET_TR_DEQUEUE command.
    
    Signed-off-by: Gerd Hoffmann <kraxel at redhat.com>

diff --git a/hw/usb/hcd-xhci.c b/hw/usb/hcd-xhci.c
index 8345fa3..d8d1226 100644
--- a/hw/usb/hcd-xhci.c
+++ b/hw/usb/hcd-xhci.c
@@ -1009,9 +1009,6 @@ static void xhci_set_ep_state(XHCIState *xhci, XHCIEPContext *epctx,
                               uint32_t state)
 {
     uint32_t ctx[5];
-    if (epctx->state == state) {
-        return;
-    }
 
     pci_dma_read(&xhci->pci_dev, epctx->pctx, ctx, sizeof(ctx));
     ctx[0] &= ~EP_STATE_MASK;
commit 79a8af3509151dd1b650c996defdda13e02a7e38
Author: Gerd Hoffmann <kraxel at redhat.com>
Date:   Wed Oct 24 09:36:55 2012 +0200

    xhci: fix function name in error message
    
    Signed-off-by: Gerd Hoffmann <kraxel at redhat.com>

diff --git a/hw/usb/hcd-xhci.c b/hw/usb/hcd-xhci.c
index caa5f3e..8345fa3 100644
--- a/hw/usb/hcd-xhci.c
+++ b/hw/usb/hcd-xhci.c
@@ -2676,7 +2676,7 @@ static void xhci_runtime_write(void *ptr, hwaddr reg,
     trace_usb_xhci_runtime_write(reg, val);
 
     if (reg < 0x20) {
-        fprintf(stderr, "xhci_oper_write: reg 0x%x unimplemented\n", (int)reg);
+        fprintf(stderr, "%s: reg 0x%x unimplemented\n", __func__, (int)reg);
         return;
     }
 
commit 6fe30910abd7e0f4af69f8e522ccfe6b5125abbb
Author: Hans de Goede <hdegoede at redhat.com>
Date:   Wed Oct 24 18:31:20 2012 +0200

    uhci: Use only one queue for ctrl endpoints
    
    ctrl endpoints use different pids for different phases of a control
    transfer, this patch makes us use only one queue for a ctrl ep, rather
    then 3.
    
    Signed-off-by: Hans de Goede <hdegoede at redhat.com>
    Signed-off-by: Gerd Hoffmann <kraxel at redhat.com>

diff --git a/hw/usb/hcd-uhci.c b/hw/usb/hcd-uhci.c
index a9e06ef..b6b972f 100644
--- a/hw/usb/hcd-uhci.c
+++ b/hw/usb/hcd-uhci.c
@@ -167,8 +167,13 @@ static void uhci_queue_fill(UHCIQueue *q, UHCI_TD *td);
 
 static inline int32_t uhci_queue_token(UHCI_TD *td)
 {
-    /* covers ep, dev, pid -> identifies the endpoint */
-    return td->token & 0x7ffff;
+    if ((td->token & (0xf << 15)) == 0) {
+        /* ctrl ep, cover ep and dev, not pid! */
+        return td->token & 0x7ff00;
+    } else {
+        /* covers ep, dev, pid -> identifies the endpoint */
+        return td->token & 0x7ffff;
+    }
 }
 
 static UHCIQueue *uhci_queue_new(UHCIState *s, uint32_t qh_addr, UHCI_TD *td,
commit 8928c9c43df1a0eeda1ae2e9582beb34dce49330
Author: Hans de Goede <hdegoede at redhat.com>
Date:   Wed Oct 24 18:31:19 2012 +0200

    uhci: Retry to fill the queue while waiting for td completion
    
    If the guest is using multiple transfers to try and keep the usb bus busy /
    used at maximum efficiency, currently we would see / do the following:
    
    1) submit transfer 1 to the device
    2) submit transfer 2 to the device
    3) report transfer 1 completion to guest
    4) report transfer 2 completion to guest
    5) submit transfer 1 to the device
    6) report transfer 1 completion to guest
    7) submit transfer 2 to the device
    8) report transfer 2 completion to guest
    etc.
    
    So after the initial submission we would effectively only have 1 transfer
    in flight, rather then 2. This is caused by us not checking the queue for
    addition of new transfers by the guest (ie the resubmission of a recently
    finished transfer), while waiting for a pending transfer to complete.
    This patch does add a check for this, changing the sequence to:
    
    1) submit transfer 1 to the device
    2) submit transfer 2 to the device
    3) report transfer 1 completion to guest
    4) submit transfer 1 to the device
    5) report transfer 2 completion to guest
    6) submit transfer 2 to the device
    etc.
    
    Thus keeping 2 transfers in flight (most of the time, and always 1),
    as intended by the guest.
    
    Signed-off-by: Hans de Goede <hdegoede at redhat.com>
    Signed-off-by: Gerd Hoffmann <kraxel at redhat.com>

diff --git a/hw/usb/hcd-uhci.c b/hw/usb/hcd-uhci.c
index beeb3fd..a9e06ef 100644
--- a/hw/usb/hcd-uhci.c
+++ b/hw/usb/hcd-uhci.c
@@ -110,7 +110,7 @@ struct UHCIQueue {
     UHCIState *uhci;
     USBEndpoint *ep;
     QTAILQ_ENTRY(UHCIQueue) next;
-    QTAILQ_HEAD(, UHCIAsync) asyncs;
+    QTAILQ_HEAD(asyncs_head, UHCIAsync) asyncs;
     int8_t    valid;
 };
 
@@ -843,15 +843,25 @@ static int uhci_handle_td(UHCIState *s, UHCIQueue *q, uint32_t qh_addr,
     }
 
     if (async) {
-        if (!async->done)
-            return TD_RESULT_ASYNC_CONT;
         if (queuing) {
             /* we are busy filling the queue, we are not prepared
                to consume completed packages then, just leave them
                in async state */
             return TD_RESULT_ASYNC_CONT;
         }
+        if (!async->done) {
+            UHCI_TD last_td;
+            UHCIAsync *last = QTAILQ_LAST(&async->queue->asyncs, asyncs_head);
+            /*
+             * While we are waiting for the current td to complete, the guest
+             * may have added more tds to the queue. Note we re-read the td
+             * rather then caching it, as we want to see guest made changes!
+             */
+            uhci_read_td(s, &last_td, last->td_addr);
+            uhci_queue_fill(async->queue, &last_td);
 
+            return TD_RESULT_ASYNC_CONT;
+        }
         uhci_async_unlink(async);
         goto done;
     }
commit 3905097ea8ea0e1b3dd1556f3ef74a123fe8622a
Author: Hans de Goede <hdegoede at redhat.com>
Date:   Wed Oct 24 18:31:18 2012 +0200

    uhci: Always mark a queue valid when we encounter it
    
    Before this patch we would not mark a queue valid when its head was a
    non-active td. This causes us to misbehave in the following scenario:
    
    1) queue with multiple input transfers queued
    2) We hit some latency issue, causing qemu to get behind processing frames
    3) When qemu gets to run again, it notices the first transfer ends short,
       marking the head td non-active
    4) It now processes 32+ frames in a row without giving the guest a chance
       to run since it is behind
    5) valid is decreased to 0, causing the queue to get cancelled also cancelling
       already queued up further input transfers
    6) guest gets to run, notices the inactive td, cleanups up further tds
       from the short transfer, and lets the queue continue at the first td of
       the next input transfer
    7) we re-start the queue, issuing the second input transfer for the *second*
       time, and any data read by the first time we issued it has been lost
    
    Signed-off-by: Hans de Goede <hdegoede at redhat.com>
    Signed-off-by: Gerd Hoffmann <kraxel at redhat.com>

diff --git a/hw/usb/hcd-uhci.c b/hw/usb/hcd-uhci.c
index 592ad8d..beeb3fd 100644
--- a/hw/usb/hcd-uhci.c
+++ b/hw/usb/hcd-uhci.c
@@ -183,6 +183,9 @@ static UHCIQueue *uhci_queue_new(UHCIState *s, uint32_t qh_addr, UHCI_TD *td,
     queue->ep = ep;
     QTAILQ_INIT(&queue->asyncs);
     QTAILQ_INSERT_HEAD(&s->queues, queue, next);
+    /* valid needs to be large enough to handle 10 frame delay
+     * for initial isochronous requests */
+    queue->valid = 32;
     trace_usb_uhci_queue_add(queue->token);
     return queue;
 }
@@ -819,6 +822,10 @@ static int uhci_handle_td(UHCIState *s, UHCIQueue *q, uint32_t qh_addr,
         }
     }
 
+    if (q) {
+        q->valid = 32;
+    }
+
     /* Is active ? */
     if (!(td->ctrl & TD_CTRL_ACTIVE)) {
         if (async) {
@@ -836,9 +843,6 @@ static int uhci_handle_td(UHCIState *s, UHCIQueue *q, uint32_t qh_addr,
     }
 
     if (async) {
-        /* Already submitted */
-        async->queue->valid = 32;
-
         if (!async->done)
             return TD_RESULT_ASYNC_CONT;
         if (queuing) {
@@ -860,11 +864,6 @@ static int uhci_handle_td(UHCIState *s, UHCIQueue *q, uint32_t qh_addr,
     }
     async = uhci_async_alloc(q, td_addr);
 
-    /* valid needs to be large enough to handle 10 frame delay
-     * for initial isochronous requests
-     */
-    async->queue->valid = 32;
-
     max_len = ((td->token >> 21) + 1) & 0x7ff;
     spd = (pid == USB_TOKEN_IN && (td->ctrl & TD_CTRL_SPD) != 0);
     usb_packet_setup(&async->packet, pid, q->ep, td_addr, spd,
commit 420ca987d5d14fabacdec86c568832d6b0056ca3
Author: Hans de Goede <hdegoede at redhat.com>
Date:   Wed Oct 24 18:31:17 2012 +0200

    uhci: When the guest marks a pending td non-active, cancel the queue
    
    Signed-off-by: Hans de Goede <hdegoede at redhat.com>
    Signed-off-by: Gerd Hoffmann <kraxel at redhat.com>

diff --git a/hw/usb/hcd-uhci.c b/hw/usb/hcd-uhci.c
index c4f2f98..592ad8d 100644
--- a/hw/usb/hcd-uhci.c
+++ b/hw/usb/hcd-uhci.c
@@ -821,6 +821,10 @@ static int uhci_handle_td(UHCIState *s, UHCIQueue *q, uint32_t qh_addr,
 
     /* Is active ? */
     if (!(td->ctrl & TD_CTRL_ACTIVE)) {
+        if (async) {
+            /* Guest marked a pending td non-active, cancel the queue */
+            uhci_queue_free(async->queue, "pending td non-active");
+        }
         /*
          * ehci11d spec page 22: "Even if the Active bit in the TD is already
          * cleared when the TD is fetched ... an IOC interrupt is generated"
commit 8c75a899f85dda9f8305d25f0cc453a79115fd90
Author: Hans de Goede <hdegoede at redhat.com>
Date:   Wed Oct 24 18:31:16 2012 +0200

    uhci: Detect guest td re-use
    
    A td can be reused by the guest in a different queue, before we notice
    the original queue has been unlinked. So search for tds by addr only, detect
    guest td reuse, and cancel the original queue, this is necessary to keep our
    packet ids unique.
    
    Signed-off-by: Hans de Goede <hdegoede at redhat.com>
    Signed-off-by: Gerd Hoffmann <kraxel at redhat.com>

diff --git a/hw/usb/hcd-uhci.c b/hw/usb/hcd-uhci.c
index 0984bee..c4f2f98 100644
--- a/hw/usb/hcd-uhci.c
+++ b/hw/usb/hcd-uhci.c
@@ -319,28 +319,18 @@ static void uhci_async_cancel_all(UHCIState *s)
     }
 }
 
-static UHCIAsync *uhci_async_find_td(UHCIState *s, uint32_t td_addr,
-                                     UHCI_TD *td)
+static UHCIAsync *uhci_async_find_td(UHCIState *s, uint32_t td_addr)
 {
-    uint32_t token = uhci_queue_token(td);
     UHCIQueue *queue;
     UHCIAsync *async;
 
     QTAILQ_FOREACH(queue, &s->queues, next) {
-        if (queue->token == token) {
-            break;
-        }
-    }
-    if (queue == NULL) {
-        return NULL;
-    }
-
-    QTAILQ_FOREACH(async, &queue->asyncs, next) {
-        if (async->td_addr == td_addr) {
-            return async;
+        QTAILQ_FOREACH(async, &queue->asyncs, next) {
+            if (async->td_addr == td_addr) {
+                return async;
+            }
         }
     }
-
     return NULL;
 }
 
@@ -805,11 +795,21 @@ out:
 static int uhci_handle_td(UHCIState *s, UHCIQueue *q, uint32_t qh_addr,
                           UHCI_TD *td, uint32_t td_addr, uint32_t *int_mask)
 {
-    UHCIAsync *async;
     int len = 0, max_len;
     bool spd;
     bool queuing = (q != NULL);
     uint8_t pid = td->token & 0xff;
+    UHCIAsync *async = uhci_async_find_td(s, td_addr);
+
+    if (async) {
+        if (uhci_queue_verify(async->queue, qh_addr, td, td_addr, queuing)) {
+            assert(q == NULL || q == async->queue);
+            q = async->queue;
+        } else {
+            uhci_queue_free(async->queue, "guest re-used pending td");
+            async = NULL;
+        }
+    }
 
     if (q == NULL) {
         q = uhci_queue_find(s, td);
@@ -831,7 +831,6 @@ static int uhci_handle_td(UHCIState *s, UHCIQueue *q, uint32_t qh_addr,
         return TD_RESULT_NEXT_QH;
     }
 
-    async = uhci_async_find_td(s, td_addr, td);
     if (async) {
         /* Already submitted */
         async->queue->valid = 32;
commit 66a08cbe6ad1aebec8eecf58b3ba042e19dd1649
Author: Hans de Goede <hdegoede at redhat.com>
Date:   Wed Oct 24 18:31:15 2012 +0200

    uhci: Verify queue has not been changed by guest
    
    According to the spec a guest can unlink a qh, and then as soon as frindex
    has changed by 1 since the unlink, assume it is idle and re-use it. However
    for various reasons, we cannot simply consider a qh as unlinked if we've not
    seen it for 1 frame. This means that it is possible for a guest to re-use /
    restart the queue while we still see its old state. This patch adds a safety
    check for this, and "early" retires queues when they were changed by the guest.
    
    Signed-off-by: Hans de Goede <hdegoede at redhat.com>
    Signed-off-by: Gerd Hoffmann <kraxel at redhat.com>

diff --git a/hw/usb/hcd-uhci.c b/hw/usb/hcd-uhci.c
index a8b74bd..0984bee 100644
--- a/hw/usb/hcd-uhci.c
+++ b/hw/usb/hcd-uhci.c
@@ -105,6 +105,7 @@ struct UHCIAsync {
 };
 
 struct UHCIQueue {
+    uint32_t  qh_addr;
     uint32_t  token;
     UHCIState *uhci;
     USBEndpoint *ep;
@@ -170,20 +171,15 @@ static inline int32_t uhci_queue_token(UHCI_TD *td)
     return td->token & 0x7ffff;
 }
 
-static UHCIQueue *uhci_queue_get(UHCIState *s, UHCI_TD *td, USBEndpoint *ep)
+static UHCIQueue *uhci_queue_new(UHCIState *s, uint32_t qh_addr, UHCI_TD *td,
+                                 USBEndpoint *ep)
 {
-    uint32_t token = uhci_queue_token(td);
     UHCIQueue *queue;
 
-    QTAILQ_FOREACH(queue, &s->queues, next) {
-        if (queue->token == token) {
-            return queue;
-        }
-    }
-
     queue = g_new0(UHCIQueue, 1);
     queue->uhci = s;
-    queue->token = token;
+    queue->qh_addr = qh_addr;
+    queue->token = uhci_queue_token(td);
     queue->ep = ep;
     QTAILQ_INIT(&queue->asyncs);
     QTAILQ_INSERT_HEAD(&s->queues, queue, next);
@@ -191,7 +187,7 @@ static UHCIQueue *uhci_queue_get(UHCIState *s, UHCI_TD *td, USBEndpoint *ep)
     return queue;
 }
 
-static void uhci_queue_free(UHCIQueue *queue)
+static void uhci_queue_free(UHCIQueue *queue, const char *reason)
 {
     UHCIState *s = queue->uhci;
     UHCIAsync *async;
@@ -201,11 +197,35 @@ static void uhci_queue_free(UHCIQueue *queue)
         uhci_async_cancel(async);
     }
 
-    trace_usb_uhci_queue_del(queue->token);
+    trace_usb_uhci_queue_del(queue->token, reason);
     QTAILQ_REMOVE(&s->queues, queue, next);
     g_free(queue);
 }
 
+static UHCIQueue *uhci_queue_find(UHCIState *s, UHCI_TD *td)
+{
+    uint32_t token = uhci_queue_token(td);
+    UHCIQueue *queue;
+
+    QTAILQ_FOREACH(queue, &s->queues, next) {
+        if (queue->token == token) {
+            return queue;
+        }
+    }
+    return NULL;
+}
+
+static bool uhci_queue_verify(UHCIQueue *queue, uint32_t qh_addr, UHCI_TD *td,
+                              uint32_t td_addr, bool queuing)
+{
+    UHCIAsync *first = QTAILQ_FIRST(&queue->asyncs);
+
+    return queue->qh_addr == qh_addr &&
+           queue->token == uhci_queue_token(td) &&
+           (queuing || !(td->ctrl & TD_CTRL_ACTIVE) || first == NULL ||
+            first->td_addr == td_addr);
+}
+
 static UHCIAsync *uhci_async_alloc(UHCIQueue *queue, uint32_t td_addr)
 {
     UHCIAsync *async = g_new0(UHCIAsync, 1);
@@ -274,7 +294,7 @@ static void uhci_async_validate_end(UHCIState *s)
 
     QTAILQ_FOREACH_SAFE(queue, &s->queues, next, n) {
         if (!queue->valid) {
-            uhci_queue_free(queue);
+            uhci_queue_free(queue, "validate-end");
         }
     }
 }
@@ -295,7 +315,7 @@ static void uhci_async_cancel_all(UHCIState *s)
     UHCIQueue *queue, *nq;
 
     QTAILQ_FOREACH_SAFE(queue, &s->queues, next, nq) {
-        uhci_queue_free(queue);
+        uhci_queue_free(queue, "cancel-all");
     }
 }
 
@@ -782,7 +802,7 @@ out:
     return err;
 }
 
-static int uhci_handle_td(UHCIState *s, UHCIQueue *q,
+static int uhci_handle_td(UHCIState *s, UHCIQueue *q, uint32_t qh_addr,
                           UHCI_TD *td, uint32_t td_addr, uint32_t *int_mask)
 {
     UHCIAsync *async;
@@ -791,6 +811,14 @@ static int uhci_handle_td(UHCIState *s, UHCIQueue *q,
     bool queuing = (q != NULL);
     uint8_t pid = td->token & 0xff;
 
+    if (q == NULL) {
+        q = uhci_queue_find(s, td);
+        if (q && !uhci_queue_verify(q, qh_addr, td, td_addr, queuing)) {
+            uhci_queue_free(q, "guest re-used qh");
+            q = NULL;
+        }
+    }
+
     /* Is active ? */
     if (!(td->ctrl & TD_CTRL_ACTIVE)) {
         /*
@@ -825,7 +853,7 @@ static int uhci_handle_td(UHCIState *s, UHCIQueue *q,
     if (q == NULL) {
         USBDevice *dev = uhci_find_device(s, (td->token >> 8) & 0x7f);
         USBEndpoint *ep = usb_ep_get(dev, pid, (td->token >> 15) & 0xf);
-        q = uhci_queue_get(s, td, ep);
+        q = uhci_queue_new(s, qh_addr, td, ep);
     }
     async = uhci_async_alloc(q, td_addr);
 
@@ -954,7 +982,7 @@ static void uhci_queue_fill(UHCIQueue *q, UHCI_TD *td)
             break;
         }
         trace_usb_uhci_td_queue(plink & ~0xf, ptd.ctrl, ptd.token);
-        ret = uhci_handle_td(q->uhci, q, &ptd, plink, &int_mask);
+        ret = uhci_handle_td(q->uhci, q, q->qh_addr, &ptd, plink, &int_mask);
         if (ret == TD_RESULT_ASYNC_CONT) {
             break;
         }
@@ -1035,7 +1063,7 @@ static void uhci_process_frame(UHCIState *s)
         trace_usb_uhci_td_load(curr_qh & ~0xf, link & ~0xf, td.ctrl, td.token);
 
         old_td_ctrl = td.ctrl;
-        ret = uhci_handle_td(s, NULL, &td, link, &int_mask);
+        ret = uhci_handle_td(s, NULL, curr_qh, &td, link, &int_mask);
         if (old_td_ctrl != td.ctrl) {
             /* update the status bits of the TD */
             val = cpu_to_le32(td.ctrl);
diff --git a/trace-events b/trace-events
index e2d4580..ed136a5 100644
--- a/trace-events
+++ b/trace-events
@@ -287,7 +287,7 @@ usb_uhci_mmio_writew(uint32_t addr, uint32_t val) "addr 0x%04x, val 0x%04x"
 usb_uhci_mmio_readl(uint32_t addr, uint32_t val) "addr 0x%04x, ret 0x%08x"
 usb_uhci_mmio_writel(uint32_t addr, uint32_t val) "addr 0x%04x, val 0x%08x"
 usb_uhci_queue_add(uint32_t token) "token 0x%x"
-usb_uhci_queue_del(uint32_t token) "token 0x%x"
+usb_uhci_queue_del(uint32_t token, const char *reason) "token 0x%x: %s"
 usb_uhci_packet_add(uint32_t token, uint32_t addr) "token 0x%x, td 0x%x"
 usb_uhci_packet_link_async(uint32_t token, uint32_t addr) "token 0x%x, td 0x%x"
 usb_uhci_packet_unlink_async(uint32_t token, uint32_t addr) "token 0x%x, td 0x%x"
commit 5ad23e873c858292dc58b9296261365312b8f683
Author: Hans de Goede <hdegoede at redhat.com>
Date:   Wed Oct 24 18:31:14 2012 +0200

    uhci: Immediately free queues on device disconnect
    
    There is no need to just cancel any in-flight packets, and then wait
    for validate-end to clean things up, we can simply clean things up
    immediately on device removal.
    
    Signed-off-by: Hans de Goede <hdegoede at redhat.com>
    Signed-off-by: Gerd Hoffmann <kraxel at redhat.com>

diff --git a/hw/usb/hcd-uhci.c b/hw/usb/hcd-uhci.c
index 8214a21..a8b74bd 100644
--- a/hw/usb/hcd-uhci.c
+++ b/hw/usb/hcd-uhci.c
@@ -281,16 +281,11 @@ static void uhci_async_validate_end(UHCIState *s)
 
 static void uhci_async_cancel_device(UHCIState *s, USBDevice *dev)
 {
-    UHCIQueue *queue;
-    UHCIAsync *curr, *n;
+    UHCIQueue *queue, *n;
 
-    QTAILQ_FOREACH(queue, &s->queues, next) {
-        QTAILQ_FOREACH_SAFE(curr, &queue->asyncs, next, n) {
-            if (!usb_packet_is_inflight(&curr->packet) ||
-                curr->packet.ep->dev != dev) {
-                continue;
-            }
-            uhci_async_cancel(curr);
+    QTAILQ_FOREACH_SAFE(queue, &s->queues, next, n) {
+        if (queue->ep->dev == dev) {
+            uhci_queue_free(queue, "cancel-device");
         }
     }
 }
commit 11d15e402beec122fb11104e36c35d1a46f650ff
Author: Hans de Goede <hdegoede at redhat.com>
Date:   Wed Oct 24 18:31:13 2012 +0200

    uhci: Store ep in UHCIQueue
    
    This avoids the need to repeatedly lookup the device, and ep.
    
    Signed-off-by: Hans de Goede <hdegoede at redhat.com>
    Signed-off-by: Gerd Hoffmann <kraxel at redhat.com>

diff --git a/hw/usb/hcd-uhci.c b/hw/usb/hcd-uhci.c
index 2bbc6fb..8214a21 100644
--- a/hw/usb/hcd-uhci.c
+++ b/hw/usb/hcd-uhci.c
@@ -107,6 +107,7 @@ struct UHCIAsync {
 struct UHCIQueue {
     uint32_t  token;
     UHCIState *uhci;
+    USBEndpoint *ep;
     QTAILQ_ENTRY(UHCIQueue) next;
     QTAILQ_HEAD(, UHCIAsync) asyncs;
     int8_t    valid;
@@ -161,7 +162,7 @@ typedef struct UHCI_QH {
 } UHCI_QH;
 
 static void uhci_async_cancel(UHCIAsync *async);
-static void uhci_queue_fill(UHCIQueue *q, UHCI_TD *td, struct USBEndpoint *ep);
+static void uhci_queue_fill(UHCIQueue *q, UHCI_TD *td);
 
 static inline int32_t uhci_queue_token(UHCI_TD *td)
 {
@@ -169,7 +170,7 @@ static inline int32_t uhci_queue_token(UHCI_TD *td)
     return td->token & 0x7ffff;
 }
 
-static UHCIQueue *uhci_queue_get(UHCIState *s, UHCI_TD *td)
+static UHCIQueue *uhci_queue_get(UHCIState *s, UHCI_TD *td, USBEndpoint *ep)
 {
     uint32_t token = uhci_queue_token(td);
     UHCIQueue *queue;
@@ -183,6 +184,7 @@ static UHCIQueue *uhci_queue_get(UHCIState *s, UHCI_TD *td)
     queue = g_new0(UHCIQueue, 1);
     queue->uhci = s;
     queue->token = token;
+    queue->ep = ep;
     QTAILQ_INIT(&queue->asyncs);
     QTAILQ_INSERT_HEAD(&s->queues, queue, next);
     trace_usb_uhci_queue_add(queue->token);
@@ -790,11 +792,9 @@ static int uhci_handle_td(UHCIState *s, UHCIQueue *q,
 {
     UHCIAsync *async;
     int len = 0, max_len;
-    uint8_t pid;
     bool spd;
-    USBDevice *dev;
-    USBEndpoint *ep;
     bool queuing = (q != NULL);
+    uint8_t pid = td->token & 0xff;
 
     /* Is active ? */
     if (!(td->ctrl & TD_CTRL_ACTIVE)) {
@@ -828,7 +828,9 @@ static int uhci_handle_td(UHCIState *s, UHCIQueue *q,
 
     /* Allocate new packet */
     if (q == NULL) {
-        q = uhci_queue_get(s, td);
+        USBDevice *dev = uhci_find_device(s, (td->token >> 8) & 0x7f);
+        USBEndpoint *ep = usb_ep_get(dev, pid, (td->token >> 15) & 0xf);
+        q = uhci_queue_get(s, td, ep);
     }
     async = uhci_async_alloc(q, td_addr);
 
@@ -838,12 +840,8 @@ static int uhci_handle_td(UHCIState *s, UHCIQueue *q,
     async->queue->valid = 32;
 
     max_len = ((td->token >> 21) + 1) & 0x7ff;
-    pid = td->token & 0xff;
     spd = (pid == USB_TOKEN_IN && (td->ctrl & TD_CTRL_SPD) != 0);
-
-    dev = uhci_find_device(s, (td->token >> 8) & 0x7f);
-    ep = usb_ep_get(dev, pid, (td->token >> 15) & 0xf);
-    usb_packet_setup(&async->packet, pid, ep, td_addr, spd,
+    usb_packet_setup(&async->packet, pid, q->ep, td_addr, spd,
                      (td->ctrl & TD_CTRL_IOC) != 0);
     qemu_sglist_add(&async->sgl, td->buffer, max_len);
     usb_packet_map(&async->packet, &async->sgl);
@@ -851,13 +849,13 @@ static int uhci_handle_td(UHCIState *s, UHCIQueue *q,
     switch(pid) {
     case USB_TOKEN_OUT:
     case USB_TOKEN_SETUP:
-        len = usb_handle_packet(dev, &async->packet);
+        len = usb_handle_packet(q->ep->dev, &async->packet);
         if (len >= 0)
             len = max_len;
         break;
 
     case USB_TOKEN_IN:
-        len = usb_handle_packet(dev, &async->packet);
+        len = usb_handle_packet(q->ep->dev, &async->packet);
         break;
 
     default:
@@ -872,7 +870,7 @@ static int uhci_handle_td(UHCIState *s, UHCIQueue *q,
     if (len == USB_RET_ASYNC) {
         uhci_async_link(async);
         if (!queuing) {
-            uhci_queue_fill(q, td, ep);
+            uhci_queue_fill(q, td);
         }
         return TD_RESULT_ASYNC_START;
     }
@@ -945,7 +943,7 @@ static int qhdb_insert(QhDb *db, uint32_t addr)
     return 0;
 }
 
-static void uhci_queue_fill(UHCIQueue *q, UHCI_TD *td, struct USBEndpoint *ep)
+static void uhci_queue_fill(UHCIQueue *q, UHCI_TD *td)
 {
     uint32_t int_mask = 0;
     uint32_t plink = td->link;
@@ -969,7 +967,7 @@ static void uhci_queue_fill(UHCIQueue *q, UHCI_TD *td, struct USBEndpoint *ep)
         assert(int_mask == 0);
         plink = ptd.link;
     }
-    usb_device_flush_ep_queue(ep->dev, ep);
+    usb_device_flush_ep_queue(q->ep->dev, q->ep);
 }
 
 static void uhci_process_frame(UHCIState *s)
commit a4f30cd766888e69e1d75cd87d251c04e829688e
Author: Hans de Goede <hdegoede at redhat.com>
Date:   Wed Oct 24 18:31:12 2012 +0200

    uhci: Make uhci_fill_queue() actually operate on an UHCIQueue
    
    And move its calling point to handle_td, this removes the ep_ret ugliness,
    and prepates the way for further cleanups in the follow-up patches in this
    patch-set.
    
    Signed-off-by: Hans de Goede <hdegoede at redhat.com>
    Signed-off-by: Gerd Hoffmann <kraxel at redhat.com>

diff --git a/hw/usb/hcd-uhci.c b/hw/usb/hcd-uhci.c
index 6d2db7f..2bbc6fb 100644
--- a/hw/usb/hcd-uhci.c
+++ b/hw/usb/hcd-uhci.c
@@ -161,6 +161,7 @@ typedef struct UHCI_QH {
 } UHCI_QH;
 
 static void uhci_async_cancel(UHCIAsync *async);
+static void uhci_queue_fill(UHCIQueue *q, UHCI_TD *td, struct USBEndpoint *ep);
 
 static inline int32_t uhci_queue_token(UHCI_TD *td)
 {
@@ -784,9 +785,8 @@ out:
     return err;
 }
 
-static int uhci_handle_td(UHCIState *s, uint32_t td_addr, UHCI_TD *td,
-                          uint32_t *int_mask, bool queuing,
-                          struct USBEndpoint **ep_ret)
+static int uhci_handle_td(UHCIState *s, UHCIQueue *q,
+                          UHCI_TD *td, uint32_t td_addr, uint32_t *int_mask)
 {
     UHCIAsync *async;
     int len = 0, max_len;
@@ -794,6 +794,7 @@ static int uhci_handle_td(UHCIState *s, uint32_t td_addr, UHCI_TD *td,
     bool spd;
     USBDevice *dev;
     USBEndpoint *ep;
+    bool queuing = (q != NULL);
 
     /* Is active ? */
     if (!(td->ctrl & TD_CTRL_ACTIVE)) {
@@ -826,7 +827,10 @@ static int uhci_handle_td(UHCIState *s, uint32_t td_addr, UHCI_TD *td,
     }
 
     /* Allocate new packet */
-    async = uhci_async_alloc(uhci_queue_get(s, td), td_addr);
+    if (q == NULL) {
+        q = uhci_queue_get(s, td);
+    }
+    async = uhci_async_alloc(q, td_addr);
 
     /* valid needs to be large enough to handle 10 frame delay
      * for initial isochronous requests
@@ -839,9 +843,6 @@ static int uhci_handle_td(UHCIState *s, uint32_t td_addr, UHCI_TD *td,
 
     dev = uhci_find_device(s, (td->token >> 8) & 0x7f);
     ep = usb_ep_get(dev, pid, (td->token >> 15) & 0xf);
-    if (ep_ret) {
-        *ep_ret = ep;
-    }
     usb_packet_setup(&async->packet, pid, ep, td_addr, spd,
                      (td->ctrl & TD_CTRL_IOC) != 0);
     qemu_sglist_add(&async->sgl, td->buffer, max_len);
@@ -870,6 +871,9 @@ static int uhci_handle_td(UHCIState *s, uint32_t td_addr, UHCI_TD *td,
  
     if (len == USB_RET_ASYNC) {
         uhci_async_link(async);
+        if (!queuing) {
+            uhci_queue_fill(q, td, ep);
+        }
         return TD_RESULT_ASYNC_START;
     }
 
@@ -941,24 +945,23 @@ static int qhdb_insert(QhDb *db, uint32_t addr)
     return 0;
 }
 
-static void uhci_fill_queue(UHCIState *s, UHCI_TD *td, struct USBEndpoint *ep)
+static void uhci_queue_fill(UHCIQueue *q, UHCI_TD *td, struct USBEndpoint *ep)
 {
     uint32_t int_mask = 0;
     uint32_t plink = td->link;
-    uint32_t token = uhci_queue_token(td);
     UHCI_TD ptd;
     int ret;
 
     while (is_valid(plink)) {
-        uhci_read_td(s, &ptd, plink);
+        uhci_read_td(q->uhci, &ptd, plink);
         if (!(ptd.ctrl & TD_CTRL_ACTIVE)) {
             break;
         }
-        if (uhci_queue_token(&ptd) != token) {
+        if (uhci_queue_token(&ptd) != q->token) {
             break;
         }
         trace_usb_uhci_td_queue(plink & ~0xf, ptd.ctrl, ptd.token);
-        ret = uhci_handle_td(s, plink, &ptd, &int_mask, true, NULL);
+        ret = uhci_handle_td(q->uhci, q, &ptd, plink, &int_mask);
         if (ret == TD_RESULT_ASYNC_CONT) {
             break;
         }
@@ -973,7 +976,6 @@ static void uhci_process_frame(UHCIState *s)
 {
     uint32_t frame_addr, link, old_td_ctrl, val, int_mask;
     uint32_t curr_qh, td_count = 0;
-    struct USBEndpoint *curr_ep;
     int cnt, ret;
     UHCI_TD td;
     UHCI_QH qh;
@@ -1040,7 +1042,7 @@ static void uhci_process_frame(UHCIState *s)
         trace_usb_uhci_td_load(curr_qh & ~0xf, link & ~0xf, td.ctrl, td.token);
 
         old_td_ctrl = td.ctrl;
-        ret = uhci_handle_td(s, link, &td, &int_mask, false, &curr_ep);
+        ret = uhci_handle_td(s, NULL, &td, link, &int_mask);
         if (old_td_ctrl != td.ctrl) {
             /* update the status bits of the TD */
             val = cpu_to_le32(td.ctrl);
@@ -1059,7 +1061,6 @@ static void uhci_process_frame(UHCIState *s)
 
         case TD_RESULT_ASYNC_START:
             trace_usb_uhci_td_async(curr_qh & ~0xf, link & ~0xf);
-            uhci_fill_queue(s, &td, curr_ep);
             link = curr_qh ? qh.link : td.link;
             continue;
 
commit 963a68b54f8f4e5dcc7cc2c6ec8d047557ac5feb
Author: Hans de Goede <hdegoede at redhat.com>
Date:   Wed Oct 24 18:31:11 2012 +0200

    uhci: Add uhci_read_td() helper function
    
    Signed-off-by: Hans de Goede <hdegoede at redhat.com>
    Signed-off-by: Gerd Hoffmann <kraxel at redhat.com>

diff --git a/hw/usb/hcd-uhci.c b/hw/usb/hcd-uhci.c
index 7589a5b..6d2db7f 100644
--- a/hw/usb/hcd-uhci.c
+++ b/hw/usb/hcd-uhci.c
@@ -693,6 +693,15 @@ static USBDevice *uhci_find_device(UHCIState *s, uint8_t addr)
     return NULL;
 }
 
+static void uhci_read_td(UHCIState *s, UHCI_TD *td, uint32_t link)
+{
+    pci_dma_read(&s->dev, link & ~0xf, td, sizeof(*td));
+    le32_to_cpus(&td->link);
+    le32_to_cpus(&td->ctrl);
+    le32_to_cpus(&td->token);
+    le32_to_cpus(&td->buffer);
+}
+
 static int uhci_complete_td(UHCIState *s, UHCI_TD *td, UHCIAsync *async, uint32_t *int_mask)
 {
     int len = 0, max_len, err, ret;
@@ -941,11 +950,7 @@ static void uhci_fill_queue(UHCIState *s, UHCI_TD *td, struct USBEndpoint *ep)
     int ret;
 
     while (is_valid(plink)) {
-        pci_dma_read(&s->dev, plink & ~0xf, &ptd, sizeof(ptd));
-        le32_to_cpus(&ptd.link);
-        le32_to_cpus(&ptd.ctrl);
-        le32_to_cpus(&ptd.token);
-        le32_to_cpus(&ptd.buffer);
+        uhci_read_td(s, &ptd, plink);
         if (!(ptd.ctrl & TD_CTRL_ACTIVE)) {
             break;
         }
@@ -1031,11 +1036,7 @@ static void uhci_process_frame(UHCIState *s)
         }
 
         /* TD */
-        pci_dma_read(&s->dev, link & ~0xf, &td, sizeof(td));
-        le32_to_cpus(&td.link);
-        le32_to_cpus(&td.ctrl);
-        le32_to_cpus(&td.token);
-        le32_to_cpus(&td.buffer);
+        uhci_read_td(s, &td, link);
         trace_usb_uhci_td_load(curr_qh & ~0xf, link & ~0xf, td.ctrl, td.token);
 
         old_td_ctrl = td.ctrl;
commit 1f250cc77224f9b950c867e47d9f47e4cd46d7d4
Author: Hans de Goede <hdegoede at redhat.com>
Date:   Wed Oct 24 18:31:10 2012 +0200

    uhci: Rename UHCIAsync->td to UHCIAsync->td_addr
    
    We use the name td both to refer to a UHCI_TD read from guest memory as
    well as to refer to the guest address where a td is stored, switch over
    to always use td_addr in the second case for consistency.
    
    Signed-off-by: Hans de Goede <hdegoede at redhat.com>
    Signed-off-by: Gerd Hoffmann <kraxel at redhat.com>

diff --git a/hw/usb/hcd-uhci.c b/hw/usb/hcd-uhci.c
index 0335f3c..7589a5b 100644
--- a/hw/usb/hcd-uhci.c
+++ b/hw/usb/hcd-uhci.c
@@ -100,7 +100,7 @@ struct UHCIAsync {
     QEMUSGList sgl;
     UHCIQueue *queue;
     QTAILQ_ENTRY(UHCIAsync) next;
-    uint32_t  td;
+    uint32_t  td_addr;
     uint8_t   done;
 };
 
@@ -203,22 +203,22 @@ static void uhci_queue_free(UHCIQueue *queue)
     g_free(queue);
 }
 
-static UHCIAsync *uhci_async_alloc(UHCIQueue *queue, uint32_t addr)
+static UHCIAsync *uhci_async_alloc(UHCIQueue *queue, uint32_t td_addr)
 {
     UHCIAsync *async = g_new0(UHCIAsync, 1);
 
     async->queue = queue;
-    async->td = addr;
+    async->td_addr = td_addr;
     usb_packet_init(&async->packet);
     pci_dma_sglist_init(&async->sgl, &queue->uhci->dev, 1);
-    trace_usb_uhci_packet_add(async->queue->token, async->td);
+    trace_usb_uhci_packet_add(async->queue->token, async->td_addr);
 
     return async;
 }
 
 static void uhci_async_free(UHCIAsync *async)
 {
-    trace_usb_uhci_packet_del(async->queue->token, async->td);
+    trace_usb_uhci_packet_del(async->queue->token, async->td_addr);
     usb_packet_cleanup(&async->packet);
     qemu_sglist_destroy(&async->sgl);
     g_free(async);
@@ -228,20 +228,21 @@ static void uhci_async_link(UHCIAsync *async)
 {
     UHCIQueue *queue = async->queue;
     QTAILQ_INSERT_TAIL(&queue->asyncs, async, next);
-    trace_usb_uhci_packet_link_async(async->queue->token, async->td);
+    trace_usb_uhci_packet_link_async(async->queue->token, async->td_addr);
 }
 
 static void uhci_async_unlink(UHCIAsync *async)
 {
     UHCIQueue *queue = async->queue;
     QTAILQ_REMOVE(&queue->asyncs, async, next);
-    trace_usb_uhci_packet_unlink_async(async->queue->token, async->td);
+    trace_usb_uhci_packet_unlink_async(async->queue->token, async->td_addr);
 }
 
 static void uhci_async_cancel(UHCIAsync *async)
 {
     uhci_async_unlink(async);
-    trace_usb_uhci_packet_cancel(async->queue->token, async->td, async->done);
+    trace_usb_uhci_packet_cancel(async->queue->token, async->td_addr,
+                                 async->done);
     if (!async->done)
         usb_cancel_packet(&async->packet);
     usb_packet_unmap(&async->packet, &async->sgl);
@@ -300,7 +301,8 @@ static void uhci_async_cancel_all(UHCIState *s)
     }
 }
 
-static UHCIAsync *uhci_async_find_td(UHCIState *s, uint32_t addr, UHCI_TD *td)
+static UHCIAsync *uhci_async_find_td(UHCIState *s, uint32_t td_addr,
+                                     UHCI_TD *td)
 {
     uint32_t token = uhci_queue_token(td);
     UHCIQueue *queue;
@@ -316,7 +318,7 @@ static UHCIAsync *uhci_async_find_td(UHCIState *s, uint32_t addr, UHCI_TD *td)
     }
 
     QTAILQ_FOREACH(async, &queue->asyncs, next) {
-        if (async->td == addr) {
+        if (async->td_addr == td_addr) {
             return async;
         }
     }
@@ -722,13 +724,14 @@ static int uhci_complete_td(UHCIState *s, UHCI_TD *td, UHCIAsync *async, uint32_
             *int_mask |= 0x02;
             /* short packet: do not update QH */
             trace_usb_uhci_packet_complete_shortxfer(async->queue->token,
-                                                    async->td);
+                                                     async->td_addr);
             return TD_RESULT_NEXT_QH;
         }
     }
 
     /* success */
-    trace_usb_uhci_packet_complete_success(async->queue->token, async->td);
+    trace_usb_uhci_packet_complete_success(async->queue->token,
+                                           async->td_addr);
     return TD_RESULT_COMPLETE;
 
 out:
@@ -739,14 +742,16 @@ out:
 
     case USB_RET_STALL:
         td->ctrl |= TD_CTRL_STALL;
-        trace_usb_uhci_packet_complete_stall(async->queue->token, async->td);
+        trace_usb_uhci_packet_complete_stall(async->queue->token,
+                                             async->td_addr);
         err = TD_RESULT_NEXT_QH;
         break;
 
     case USB_RET_BABBLE:
         td->ctrl |= TD_CTRL_BABBLE | TD_CTRL_STALL;
         /* frame interrupted */
-        trace_usb_uhci_packet_complete_babble(async->queue->token, async->td);
+        trace_usb_uhci_packet_complete_babble(async->queue->token,
+                                              async->td_addr);
         err = TD_RESULT_STOP_FRAME;
         break;
 
@@ -755,7 +760,8 @@ out:
     default:
         td->ctrl |= TD_CTRL_TIMEOUT;
         td->ctrl &= ~(3 << TD_CTRL_ERROR_SHIFT);
-        trace_usb_uhci_packet_complete_error(async->queue->token, async->td);
+        trace_usb_uhci_packet_complete_error(async->queue->token,
+                                             async->td_addr);
         err = TD_RESULT_NEXT_QH;
         break;
     }
@@ -769,7 +775,7 @@ out:
     return err;
 }
 
-static int uhci_handle_td(UHCIState *s, uint32_t addr, UHCI_TD *td,
+static int uhci_handle_td(UHCIState *s, uint32_t td_addr, UHCI_TD *td,
                           uint32_t *int_mask, bool queuing,
                           struct USBEndpoint **ep_ret)
 {
@@ -792,7 +798,7 @@ static int uhci_handle_td(UHCIState *s, uint32_t addr, UHCI_TD *td,
         return TD_RESULT_NEXT_QH;
     }
 
-    async = uhci_async_find_td(s, addr, td);
+    async = uhci_async_find_td(s, td_addr, td);
     if (async) {
         /* Already submitted */
         async->queue->valid = 32;
@@ -811,7 +817,7 @@ static int uhci_handle_td(UHCIState *s, uint32_t addr, UHCI_TD *td,
     }
 
     /* Allocate new packet */
-    async = uhci_async_alloc(uhci_queue_get(s, td), addr);
+    async = uhci_async_alloc(uhci_queue_get(s, td), td_addr);
 
     /* valid needs to be large enough to handle 10 frame delay
      * for initial isochronous requests
@@ -827,7 +833,7 @@ static int uhci_handle_td(UHCIState *s, uint32_t addr, UHCI_TD *td,
     if (ep_ret) {
         *ep_ret = ep;
     }
-    usb_packet_setup(&async->packet, pid, ep, addr, spd,
+    usb_packet_setup(&async->packet, pid, ep, td_addr, spd,
                      (td->ctrl & TD_CTRL_IOC) != 0);
     qemu_sglist_add(&async->sgl, td->buffer, max_len);
     usb_packet_map(&async->packet, &async->sgl);
commit 40507377261250fe19e441300788c77b748688cb
Author: Hans de Goede <hdegoede at redhat.com>
Date:   Wed Oct 24 18:31:09 2012 +0200

    uhci: Move emptying of the queue's asyncs' queue to uhci_queue_free
    
    Cleanup: all callers of uhci_queue_free first unconditionally cancel
    all remaining asyncs in the queue, so lets move this to uhci_queue_free().
    
    Signed-off-by: Hans de Goede <hdegoede at redhat.com>
    Signed-off-by: Gerd Hoffmann <kraxel at redhat.com>

diff --git a/hw/usb/hcd-uhci.c b/hw/usb/hcd-uhci.c
index 7dfedef..0335f3c 100644
--- a/hw/usb/hcd-uhci.c
+++ b/hw/usb/hcd-uhci.c
@@ -160,6 +160,8 @@ typedef struct UHCI_QH {
     uint32_t el_link;
 } UHCI_QH;
 
+static void uhci_async_cancel(UHCIAsync *async);
+
 static inline int32_t uhci_queue_token(UHCI_TD *td)
 {
     /* covers ep, dev, pid -> identifies the endpoint */
@@ -189,6 +191,12 @@ static UHCIQueue *uhci_queue_get(UHCIState *s, UHCI_TD *td)
 static void uhci_queue_free(UHCIQueue *queue)
 {
     UHCIState *s = queue->uhci;
+    UHCIAsync *async;
+
+    while (!QTAILQ_EMPTY(&queue->asyncs)) {
+        async = QTAILQ_FIRST(&queue->asyncs);
+        uhci_async_cancel(async);
+    }
 
     trace_usb_uhci_queue_del(queue->token);
     QTAILQ_REMOVE(&s->queues, queue, next);
@@ -259,17 +267,11 @@ static void uhci_async_validate_begin(UHCIState *s)
 static void uhci_async_validate_end(UHCIState *s)
 {
     UHCIQueue *queue, *n;
-    UHCIAsync *async;
 
     QTAILQ_FOREACH_SAFE(queue, &s->queues, next, n) {
-        if (queue->valid > 0) {
-            continue;
+        if (!queue->valid) {
+            uhci_queue_free(queue);
         }
-        while (!QTAILQ_EMPTY(&queue->asyncs)) {
-            async = QTAILQ_FIRST(&queue->asyncs);
-            uhci_async_cancel(async);
-        }
-        uhci_queue_free(queue);
     }
 }
 
@@ -292,12 +294,8 @@ static void uhci_async_cancel_device(UHCIState *s, USBDevice *dev)
 static void uhci_async_cancel_all(UHCIState *s)
 {
     UHCIQueue *queue, *nq;
-    UHCIAsync *curr, *n;
 
     QTAILQ_FOREACH_SAFE(queue, &s->queues, next, nq) {
-        QTAILQ_FOREACH_SAFE(curr, &queue->asyncs, next, n) {
-            uhci_async_cancel(curr);
-        }
         uhci_queue_free(queue);
     }
 }
commit 3c87c76d1a0e9ce1dde6f919e3b1b9bcabb69985
Author: Hans de Goede <hdegoede at redhat.com>
Date:   Wed Oct 24 18:31:08 2012 +0200

    uhci: Drop unnecessary forward declaration of some static functions
    
    Signed-off-by: Hans de Goede <hdegoede at redhat.com>
    Signed-off-by: Gerd Hoffmann <kraxel at redhat.com>

diff --git a/hw/usb/hcd-uhci.c b/hw/usb/hcd-uhci.c
index eecd291..7dfedef 100644
--- a/hw/usb/hcd-uhci.c
+++ b/hw/usb/hcd-uhci.c
@@ -693,9 +693,6 @@ static USBDevice *uhci_find_device(UHCIState *s, uint8_t addr)
     return NULL;
 }
 
-static void uhci_async_complete(USBPort *port, USBPacket *packet);
-static void uhci_process_frame(UHCIState *s);
-
 static int uhci_complete_td(UHCIState *s, UHCI_TD *td, UHCIAsync *async, uint32_t *int_mask)
 {
     int len = 0, max_len, err, ret;
commit a89e255b0cbe7c5b56348201e1c0f6d427c98fc8
Author: Hans de Goede <hdegoede at redhat.com>
Date:   Wed Oct 24 18:31:07 2012 +0200

    uhci: Don't retry on error
    
    Since we are either dealing with emulated devices, where retrying is
    not going to help, or with redirected devices where the host OS will
    have already retried, don't bother retrying on failed transfers.
    
    Also move some common/indentical code out of all the error cases
    into the generic error path.
    
    Signed-off-by: Hans de Goede <hdegoede at redhat.com>
    Signed-off-by: Gerd Hoffmann <kraxel at redhat.com>

diff --git a/hw/usb/hcd-uhci.c b/hw/usb/hcd-uhci.c
index 82dd5c2..eecd291 100644
--- a/hw/usb/hcd-uhci.c
+++ b/hw/usb/hcd-uhci.c
@@ -696,10 +696,6 @@ static USBDevice *uhci_find_device(UHCIState *s, uint8_t addr)
 static void uhci_async_complete(USBPort *port, USBPacket *packet);
 static void uhci_process_frame(UHCIState *s);
 
-/* return -1 if fatal error (frame must be stopped)
-          0 if TD successful
-          1 if TD unsuccessful or inactive
-*/
 static int uhci_complete_td(UHCIState *s, UHCI_TD *td, UHCIAsync *async, uint32_t *int_mask)
 {
     int len = 0, max_len, err, ret;
@@ -742,60 +738,40 @@ static int uhci_complete_td(UHCIState *s, UHCI_TD *td, UHCIAsync *async, uint32_
 
 out:
     switch(ret) {
+    case USB_RET_NAK:
+        td->ctrl |= TD_CTRL_NAK;
+        return TD_RESULT_NEXT_QH;
+
     case USB_RET_STALL:
         td->ctrl |= TD_CTRL_STALL;
-        td->ctrl &= ~TD_CTRL_ACTIVE;
-        s->status |= UHCI_STS_USBERR;
-        if (td->ctrl & TD_CTRL_IOC) {
-            *int_mask |= 0x01;
-        }
-        uhci_update_irq(s);
         trace_usb_uhci_packet_complete_stall(async->queue->token, async->td);
-        return TD_RESULT_NEXT_QH;
+        err = TD_RESULT_NEXT_QH;
+        break;
 
     case USB_RET_BABBLE:
         td->ctrl |= TD_CTRL_BABBLE | TD_CTRL_STALL;
-        td->ctrl &= ~TD_CTRL_ACTIVE;
-        s->status |= UHCI_STS_USBERR;
-        if (td->ctrl & TD_CTRL_IOC) {
-            *int_mask |= 0x01;
-        }
-        uhci_update_irq(s);
         /* frame interrupted */
         trace_usb_uhci_packet_complete_babble(async->queue->token, async->td);
-        return TD_RESULT_STOP_FRAME;
-
-    case USB_RET_NAK:
-        td->ctrl |= TD_CTRL_NAK;
-        if (pid == USB_TOKEN_SETUP)
-            break;
-        return TD_RESULT_NEXT_QH;
+        err = TD_RESULT_STOP_FRAME;
+        break;
 
     case USB_RET_IOERROR:
     case USB_RET_NODEV:
     default:
-	break;
+        td->ctrl |= TD_CTRL_TIMEOUT;
+        td->ctrl &= ~(3 << TD_CTRL_ERROR_SHIFT);
+        trace_usb_uhci_packet_complete_error(async->queue->token, async->td);
+        err = TD_RESULT_NEXT_QH;
+        break;
     }
 
-    /* Retry the TD if error count is not zero */
-
-    td->ctrl |= TD_CTRL_TIMEOUT;
-    err = (td->ctrl >> TD_CTRL_ERROR_SHIFT) & 3;
-    if (err != 0) {
-        err--;
-        if (err == 0) {
-            td->ctrl &= ~TD_CTRL_ACTIVE;
-            s->status |= UHCI_STS_USBERR;
-            if (td->ctrl & TD_CTRL_IOC)
-                *int_mask |= 0x01;
-            uhci_update_irq(s);
-            trace_usb_uhci_packet_complete_error(async->queue->token,
-                                                 async->td);
-        }
+    td->ctrl &= ~TD_CTRL_ACTIVE;
+    s->status |= UHCI_STS_USBERR;
+    if (td->ctrl & TD_CTRL_IOC) {
+        *int_mask |= 0x01;
     }
-    td->ctrl = (td->ctrl & ~(3 << TD_CTRL_ERROR_SHIFT)) |
-        (err << TD_CTRL_ERROR_SHIFT);
-    return TD_RESULT_NEXT_QH;
+    uhci_update_irq(s);
+    return err;
 }
 
 static int uhci_handle_td(UHCIState *s, uint32_t addr, UHCI_TD *td,
commit 2f2ee2689ffe8a784a86bf10fb946772748632c7
Author: Hans de Goede <hdegoede at redhat.com>
Date:   Wed Oct 24 18:31:06 2012 +0200

    uhci: cleanup: Add an unlink call to uhci_async_cancel()
    
    All callers of uhci_async_cancel() call uhci_async_unlink() first, so
    lets move the unlink call to uhci_async_cancel()
    
    Signed-off-by: Hans de Goede <hdegoede at redhat.com>
    Signed-off-by: Gerd Hoffmann <kraxel at redhat.com>

diff --git a/hw/usb/hcd-uhci.c b/hw/usb/hcd-uhci.c
index 129792b..82dd5c2 100644
--- a/hw/usb/hcd-uhci.c
+++ b/hw/usb/hcd-uhci.c
@@ -232,6 +232,7 @@ static void uhci_async_unlink(UHCIAsync *async)
 
 static void uhci_async_cancel(UHCIAsync *async)
 {
+    uhci_async_unlink(async);
     trace_usb_uhci_packet_cancel(async->queue->token, async->td, async->done);
     if (!async->done)
         usb_cancel_packet(&async->packet);
@@ -266,7 +267,6 @@ static void uhci_async_validate_end(UHCIState *s)
         }
         while (!QTAILQ_EMPTY(&queue->asyncs)) {
             async = QTAILQ_FIRST(&queue->asyncs);
-            uhci_async_unlink(async);
             uhci_async_cancel(async);
         }
         uhci_queue_free(queue);
@@ -284,7 +284,6 @@ static void uhci_async_cancel_device(UHCIState *s, USBDevice *dev)
                 curr->packet.ep->dev != dev) {
                 continue;
             }
-            uhci_async_unlink(curr);
             uhci_async_cancel(curr);
         }
     }
@@ -297,7 +296,6 @@ static void uhci_async_cancel_all(UHCIState *s)
 
     QTAILQ_FOREACH_SAFE(queue, &s->queues, next, nq) {
         QTAILQ_FOREACH_SAFE(curr, &queue->asyncs, next, n) {
-            uhci_async_unlink(curr);
             uhci_async_cancel(curr);
         }
         uhci_queue_free(queue);
commit 5b352ed537e31a6b5f9e33e26429ba8b909af234
Author: Hans de Goede <hdegoede at redhat.com>
Date:   Wed Oct 24 18:31:05 2012 +0200

    uhci: No need to handle async completion of isoc packets
    
    No devices ever return async for isoc endpoints and the core
    already enforces this.
    
    Signed-off-by: Hans de Goede <hdegoede at redhat.com>
    Signed-off-by: Gerd Hoffmann <kraxel at redhat.com>

diff --git a/hw/usb/hcd-uhci.c b/hw/usb/hcd-uhci.c
index 4a1ea6b..129792b 100644
--- a/hw/usb/hcd-uhci.c
+++ b/hw/usb/hcd-uhci.c
@@ -101,7 +101,6 @@ struct UHCIAsync {
     UHCIQueue *queue;
     QTAILQ_ENTRY(UHCIAsync) next;
     uint32_t  td;
-    uint8_t   isoc;
     uint8_t   done;
 };
 
@@ -849,7 +848,6 @@ static int uhci_handle_td(UHCIState *s, uint32_t addr, UHCI_TD *td,
      * for initial isochronous requests
      */
     async->queue->valid = 32;
-    async->isoc = td->ctrl & TD_CTRL_IOS;
 
     max_len = ((td->token >> 21) + 1) & 0x7ff;
     pid = td->token & 0xff;
@@ -911,30 +909,9 @@ static void uhci_async_complete(USBPort *port, USBPacket *packet)
         return;
     }
 
-    if (async->isoc) {
-        UHCI_TD td;
-        uint32_t link = async->td;
-        uint32_t int_mask = 0, val;
-
-        pci_dma_read(&s->dev, link & ~0xf, &td, sizeof(td));
-        le32_to_cpus(&td.link);
-        le32_to_cpus(&td.ctrl);
-        le32_to_cpus(&td.token);
-        le32_to_cpus(&td.buffer);
-
-        uhci_async_unlink(async);
-        uhci_complete_td(s, &td, async, &int_mask);
-        s->pending_int_mask |= int_mask;
-
-        /* update the status bits of the TD */
-        val = cpu_to_le32(td.ctrl);
-        pci_dma_write(&s->dev, (link & ~0xf) + 4, &val, sizeof(val));
-        uhci_async_free(async);
-    } else {
-        async->done = 1;
-        if (s->frame_bytes < s->frame_bandwidth) {
-            qemu_bh_schedule(s->bh);
-        }
+    async->done = 1;
+    if (s->frame_bytes < s->frame_bandwidth) {
+        qemu_bh_schedule(s->bh);
     }
 }
 
commit aaac74343d761947e57a1b0cbaa7f2dd0c6a40ca
Author: Hans de Goede <hdegoede at redhat.com>
Date:   Wed Oct 24 18:31:04 2012 +0200

    usb: Enforce iso endpoints never returing USB_RET_ASYNC
    
    ehci was already testing for this, and we depend in various places
    on no devices doing this, so lets move the check for this to the
    usb core.
    
    Signed-off-by: Hans de Goede <hdegoede at redhat.com>
    Signed-off-by: Gerd Hoffmann <kraxel at redhat.com>

diff --git a/hw/usb/core.c b/hw/usb/core.c
index 87a513f..632a8ef 100644
--- a/hw/usb/core.c
+++ b/hw/usb/core.c
@@ -391,6 +391,7 @@ int usb_handle_packet(USBDevice *dev, USBPacket *p)
     if (QTAILQ_EMPTY(&p->ep->queue) || p->ep->pipeline) {
         ret = usb_process_one(p);
         if (ret == USB_RET_ASYNC) {
+            assert(p->ep->type != USB_ENDPOINT_XFER_ISOC);
             usb_packet_set_state(p, USB_PACKET_ASYNC);
             QTAILQ_INSERT_TAIL(&p->ep->queue, p, queue);
         } else if (ret == USB_RET_ADD_TO_QUEUE) {
diff --git a/hw/usb/hcd-ehci.c b/hw/usb/hcd-ehci.c
index 8ac78ad..f14f9d7 100644
--- a/hw/usb/hcd-ehci.c
+++ b/hw/usb/hcd-ehci.c
@@ -1667,7 +1667,6 @@ static int ehci_process_itd(EHCIState *ehci,
                                  (itd->transact[i] & ITD_XACT_IOC) != 0);
                 usb_packet_map(&ehci->ipacket, &ehci->isgl);
                 ret = usb_handle_packet(dev, &ehci->ipacket);
-                assert(ret != USB_RET_ASYNC);
                 usb_packet_unmap(&ehci->ipacket, &ehci->isgl);
             } else {
                 DPRINTF("ISOCH: attempt to addess non-iso endpoint\n");
commit a6fb2ddb1417fcc2d24f3231c84035fcbd90123a
Author: Hans de Goede <hdegoede at redhat.com>
Date:   Wed Oct 24 18:14:10 2012 +0200

    usb: Add an int_req flag to USBPacket
    
    Signed-off-by: Hans de Goede <hdegoede at redhat.com>
    Signed-off-by: Gerd Hoffmann <kraxel at redhat.com>

diff --git a/hw/usb.h b/hw/usb.h
index 1fcf79c..3a6cc84 100644
--- a/hw/usb.h
+++ b/hw/usb.h
@@ -352,6 +352,7 @@ struct USBPacket {
     QEMUIOVector iov;
     uint64_t parameter; /* control transfers */
     bool short_not_ok;
+    bool int_req;
     int result; /* transfer length or USB_RET_* status code */
     /* Internal use by the USB layer.  */
     USBPacketState state;
@@ -362,7 +363,7 @@ void usb_packet_init(USBPacket *p);
 void usb_packet_set_state(USBPacket *p, USBPacketState state);
 void usb_packet_check_state(USBPacket *p, USBPacketState expected);
 void usb_packet_setup(USBPacket *p, int pid, USBEndpoint *ep, uint64_t id,
-                      bool short_not_ok);
+                      bool short_not_ok, bool int_req);
 void usb_packet_addbuf(USBPacket *p, void *ptr, size_t len);
 int usb_packet_map(USBPacket *p, QEMUSGList *sgl);
 void usb_packet_unmap(USBPacket *p, QEMUSGList *sgl);
diff --git a/hw/usb/core.c b/hw/usb/core.c
index f4a5ad2..87a513f 100644
--- a/hw/usb/core.c
+++ b/hw/usb/core.c
@@ -533,7 +533,7 @@ void usb_packet_set_state(USBPacket *p, USBPacketState state)
 }
 
 void usb_packet_setup(USBPacket *p, int pid, USBEndpoint *ep, uint64_t id,
-                      bool short_not_ok)
+                      bool short_not_ok, bool int_req)
 {
     assert(!usb_packet_is_inflight(p));
     assert(p->iov.iov != NULL);
@@ -543,6 +543,7 @@ void usb_packet_setup(USBPacket *p, int pid, USBEndpoint *ep, uint64_t id,
     p->result = 0;
     p->parameter = 0;
     p->short_not_ok = short_not_ok;
+    p->int_req = int_req;
     qemu_iovec_reset(&p->iov);
     usb_packet_set_state(p, USB_PACKET_SETUP);
 }
diff --git a/hw/usb/hcd-ehci.c b/hw/usb/hcd-ehci.c
index 8e8ec6b..8ac78ad 100644
--- a/hw/usb/hcd-ehci.c
+++ b/hw/usb/hcd-ehci.c
@@ -1592,7 +1592,8 @@ static int ehci_execute(EHCIPacket *p, const char *action)
         }
 
         spd = (p->pid == USB_TOKEN_IN && NLPTR_TBIT(p->qtd.altnext) == 0);
-        usb_packet_setup(&p->packet, p->pid, ep, p->qtdaddr, spd);
+        usb_packet_setup(&p->packet, p->pid, ep, p->qtdaddr, spd,
+                         (p->qtd.token & QTD_TOKEN_IOC) != 0);
         usb_packet_map(&p->packet, &p->sgl);
         p->async = EHCI_ASYNC_INITIALIZED;
     }
@@ -1662,7 +1663,8 @@ static int ehci_process_itd(EHCIState *ehci,
             dev = ehci_find_device(ehci, devaddr);
             ep = usb_ep_get(dev, pid, endp);
             if (ep && ep->type == USB_ENDPOINT_XFER_ISOC) {
-                usb_packet_setup(&ehci->ipacket, pid, ep, addr, false);
+                usb_packet_setup(&ehci->ipacket, pid, ep, addr, false,
+                                 (itd->transact[i] & ITD_XACT_IOC) != 0);
                 usb_packet_map(&ehci->ipacket, &ehci->isgl);
                 ret = usb_handle_packet(dev, &ehci->ipacket);
                 assert(ret != USB_RET_ASYNC);
diff --git a/hw/usb/hcd-musb.c b/hw/usb/hcd-musb.c
index c4309a4..4f55390 100644
--- a/hw/usb/hcd-musb.c
+++ b/hw/usb/hcd-musb.c
@@ -627,7 +627,7 @@ static void musb_packet(MUSBState *s, MUSBEndPoint *ep,
     dev = usb_find_device(&s->port, ep->faddr[idx]);
     uep = usb_ep_get(dev, pid, ep->type[idx] & 0xf);
     usb_packet_setup(&ep->packey[dir].p, pid, uep,
-                     (dev->addr << 16) | (uep->nr << 8) | pid, false);
+                     (dev->addr << 16) | (uep->nr << 8) | pid, false, true);
     usb_packet_addbuf(&ep->packey[dir].p, ep->buf[idx], len);
     ep->packey[dir].ep = ep;
     ep->packey[dir].dir = dir;
diff --git a/hw/usb/hcd-ohci.c b/hw/usb/hcd-ohci.c
index 00e2e1a..7571e9e 100644
--- a/hw/usb/hcd-ohci.c
+++ b/hw/usb/hcd-ohci.c
@@ -810,9 +810,11 @@ static int ohci_service_iso_td(OHCIState *ohci, struct ohci_ed *ed,
     if (completion) {
         ret = ohci->usb_packet.result;
     } else {
+        bool int_req = relative_frame_number == frame_count &&
+                       OHCI_BM(iso_td.flags, TD_DI) == 0;
         dev = ohci_find_device(ohci, OHCI_BM(ed->flags, ED_FA));
         ep = usb_ep_get(dev, pid, OHCI_BM(ed->flags, ED_EN));
-        usb_packet_setup(&ohci->usb_packet, pid, ep, addr, false);
+        usb_packet_setup(&ohci->usb_packet, pid, ep, addr, false, int_req);
         usb_packet_addbuf(&ohci->usb_packet, ohci->usb_buf, len);
         ret = usb_handle_packet(dev, &ohci->usb_packet);
         if (ret == USB_RET_ASYNC) {
@@ -1012,7 +1014,8 @@ static int ohci_service_td(OHCIState *ohci, struct ohci_ed *ed)
         }
         dev = ohci_find_device(ohci, OHCI_BM(ed->flags, ED_FA));
         ep = usb_ep_get(dev, pid, OHCI_BM(ed->flags, ED_EN));
-        usb_packet_setup(&ohci->usb_packet, pid, ep, addr, !flag_r);
+        usb_packet_setup(&ohci->usb_packet, pid, ep, addr, !flag_r,
+                         OHCI_BM(td.flags, TD_DI) == 0);
         usb_packet_addbuf(&ohci->usb_packet, ohci->usb_buf, pktlen);
         ret = usb_handle_packet(dev, &ohci->usb_packet);
 #ifdef DEBUG_PACKET
diff --git a/hw/usb/hcd-uhci.c b/hw/usb/hcd-uhci.c
index 953897b..4a1ea6b 100644
--- a/hw/usb/hcd-uhci.c
+++ b/hw/usb/hcd-uhci.c
@@ -860,7 +860,8 @@ static int uhci_handle_td(UHCIState *s, uint32_t addr, UHCI_TD *td,
     if (ep_ret) {
         *ep_ret = ep;
     }
-    usb_packet_setup(&async->packet, pid, ep, addr, spd);
+    usb_packet_setup(&async->packet, pid, ep, addr, spd,
+                     (td->ctrl & TD_CTRL_IOC) != 0);
     qemu_sglist_add(&async->sgl, td->buffer, max_len);
     usb_packet_map(&async->packet, &async->sgl);
 
diff --git a/hw/usb/hcd-xhci.c b/hw/usb/hcd-xhci.c
index ebfc5b8..caa5f3e 100644
--- a/hw/usb/hcd-xhci.c
+++ b/hw/usb/hcd-xhci.c
@@ -322,6 +322,7 @@ typedef struct XHCITransfer {
     bool running_retry;
     bool cancelled;
     bool complete;
+    bool int_req;
     unsigned int iso_pkts;
     unsigned int slotid;
     unsigned int epid;
@@ -1292,18 +1293,22 @@ static TRBCCode xhci_set_ep_dequeue(XHCIState *xhci, unsigned int slotid,
     return CC_SUCCESS;
 }
 
-static int xhci_xfer_map(XHCITransfer *xfer)
+static int xhci_xfer_create_sgl(XHCITransfer *xfer, int in_xfer)
 {
-    int in_xfer = (xfer->packet.pid == USB_TOKEN_IN);
     XHCIState *xhci = xfer->xhci;
     int i;
 
+    xfer->int_req = false;
     pci_dma_sglist_init(&xfer->sgl, &xhci->pci_dev, xfer->trb_count);
     for (i = 0; i < xfer->trb_count; i++) {
         XHCITRB *trb = &xfer->trbs[i];
         dma_addr_t addr;
         unsigned int chunk = 0;
 
+        if (trb->control & TRB_TR_IOC) {
+            xfer->int_req = true;
+        }
+
         switch (TRB_TYPE(*trb)) {
         case TR_DATA:
             if ((!(trb->control & TRB_TR_DIR)) != (!in_xfer)) {
@@ -1328,7 +1333,6 @@ static int xhci_xfer_map(XHCITransfer *xfer)
         }
     }
 
-    usb_packet_map(&xfer->packet, &xfer->sgl);
     return 0;
 
 err:
@@ -1446,8 +1450,10 @@ static int xhci_setup_packet(XHCITransfer *xfer)
         ep = usb_ep_get(dev, dir, xfer->epid >> 1);
     }
 
-    usb_packet_setup(&xfer->packet, dir, ep, xfer->trbs[0].addr, false);
-    xhci_xfer_map(xfer);
+    xhci_xfer_create_sgl(xfer, dir == USB_TOKEN_IN); /* Also sets int_req */
+    usb_packet_setup(&xfer->packet, dir, ep, xfer->trbs[0].addr, false,
+                     xfer->int_req);
+    usb_packet_map(&xfer->packet, &xfer->sgl);
     DPRINTF("xhci: setup packet pid 0x%x addr %d ep %d\n",
             xfer->packet.pid, dev->addr, ep->nr);
     return 0;
commit 6ba43f1f6b60159e731b1f37ffb53ab9ab59efa9
Author: Hans de Goede <hdegoede at redhat.com>
Date:   Wed Oct 24 18:14:09 2012 +0200

    usb: Move short-not-ok handling to the core
    
    After a short-not-ok packet ending short, we should not advance the queue.
    Move enforcing this to the core, rather then handling it in the hcd code.
    
    This may result in the queue now actually containing multiple input packets
    (which would not happen before), and this requires special handling in
    combination with pipelining, so disable pipleining for input endpoints
    (for now).
    
    Signed-off-by: Hans de Goede <hdegoede at redhat.com>
    Signed-off-by: Gerd Hoffmann <kraxel at redhat.com>

diff --git a/hw/usb.h b/hw/usb.h
index ead03c9..1fcf79c 100644
--- a/hw/usb.h
+++ b/hw/usb.h
@@ -351,6 +351,7 @@ struct USBPacket {
     USBEndpoint *ep;
     QEMUIOVector iov;
     uint64_t parameter; /* control transfers */
+    bool short_not_ok;
     int result; /* transfer length or USB_RET_* status code */
     /* Internal use by the USB layer.  */
     USBPacketState state;
@@ -360,7 +361,8 @@ struct USBPacket {
 void usb_packet_init(USBPacket *p);
 void usb_packet_set_state(USBPacket *p, USBPacketState state);
 void usb_packet_check_state(USBPacket *p, USBPacketState expected);
-void usb_packet_setup(USBPacket *p, int pid, USBEndpoint *ep, uint64_t id);
+void usb_packet_setup(USBPacket *p, int pid, USBEndpoint *ep, uint64_t id,
+                      bool short_not_ok);
 void usb_packet_addbuf(USBPacket *p, void *ptr, size_t len);
 int usb_packet_map(USBPacket *p, QEMUSGList *sgl);
 void usb_packet_unmap(USBPacket *p, QEMUSGList *sgl);
diff --git a/hw/usb/core.c b/hw/usb/core.c
index 5a97a0e..f4a5ad2 100644
--- a/hw/usb/core.c
+++ b/hw/usb/core.c
@@ -423,7 +423,7 @@ void usb_packet_complete_one(USBDevice *dev, USBPacket *p)
     assert(QTAILQ_FIRST(&ep->queue) == p);
     assert(p->result != USB_RET_ASYNC && p->result != USB_RET_NAK);
 
-    if (p->result < 0) {
+    if (p->result < 0 || (p->short_not_ok && (p->result < p->iov.size))) {
         ep->halted = true;
     }
     usb_packet_set_state(p, USB_PACKET_COMPLETE);
@@ -532,7 +532,8 @@ void usb_packet_set_state(USBPacket *p, USBPacketState state)
     p->state = state;
 }
 
-void usb_packet_setup(USBPacket *p, int pid, USBEndpoint *ep, uint64_t id)
+void usb_packet_setup(USBPacket *p, int pid, USBEndpoint *ep, uint64_t id,
+                      bool short_not_ok)
 {
     assert(!usb_packet_is_inflight(p));
     assert(p->iov.iov != NULL);
@@ -541,6 +542,7 @@ void usb_packet_setup(USBPacket *p, int pid, USBEndpoint *ep, uint64_t id)
     p->ep = ep;
     p->result = 0;
     p->parameter = 0;
+    p->short_not_ok = short_not_ok;
     qemu_iovec_reset(&p->iov);
     usb_packet_set_state(p, USB_PACKET_SETUP);
 }
diff --git a/hw/usb/hcd-ehci.c b/hw/usb/hcd-ehci.c
index 74a2587..8e8ec6b 100644
--- a/hw/usb/hcd-ehci.c
+++ b/hw/usb/hcd-ehci.c
@@ -1551,6 +1551,7 @@ static int ehci_execute(EHCIPacket *p, const char *action)
     USBEndpoint *ep;
     int ret;
     int endp;
+    bool spd;
 
     assert(p->async == EHCI_ASYNC_NONE ||
            p->async == EHCI_ASYNC_INITIALIZED);
@@ -1590,7 +1591,8 @@ static int ehci_execute(EHCIPacket *p, const char *action)
             return USB_RET_PROCERR;
         }
 
-        usb_packet_setup(&p->packet, p->pid, ep, p->qtdaddr);
+        spd = (p->pid == USB_TOKEN_IN && NLPTR_TBIT(p->qtd.altnext) == 0);
+        usb_packet_setup(&p->packet, p->pid, ep, p->qtdaddr, spd);
         usb_packet_map(&p->packet, &p->sgl);
         p->async = EHCI_ASYNC_INITIALIZED;
     }
@@ -1660,7 +1662,7 @@ static int ehci_process_itd(EHCIState *ehci,
             dev = ehci_find_device(ehci, devaddr);
             ep = usb_ep_get(dev, pid, endp);
             if (ep && ep->type == USB_ENDPOINT_XFER_ISOC) {
-                usb_packet_setup(&ehci->ipacket, pid, ep, addr);
+                usb_packet_setup(&ehci->ipacket, pid, ep, addr, false);
                 usb_packet_map(&ehci->ipacket, &ehci->isgl);
                 ret = usb_handle_packet(dev, &ehci->ipacket);
                 assert(ret != USB_RET_ASYNC);
@@ -2085,9 +2087,6 @@ static int ehci_fill_queue(EHCIPacket *p)
     uint32_t qtdaddr, start_addr = p->qtdaddr;
 
     for (;;) {
-        if (NLPTR_TBIT(qtd.altnext) == 0) {
-            break;
-        }
         if (NLPTR_TBIT(qtd.next) != 0) {
             break;
         }
diff --git a/hw/usb/hcd-musb.c b/hw/usb/hcd-musb.c
index 212dd12..c4309a4 100644
--- a/hw/usb/hcd-musb.c
+++ b/hw/usb/hcd-musb.c
@@ -627,7 +627,7 @@ static void musb_packet(MUSBState *s, MUSBEndPoint *ep,
     dev = usb_find_device(&s->port, ep->faddr[idx]);
     uep = usb_ep_get(dev, pid, ep->type[idx] & 0xf);
     usb_packet_setup(&ep->packey[dir].p, pid, uep,
-                     (dev->addr << 16) | (uep->nr << 8) | pid);
+                     (dev->addr << 16) | (uep->nr << 8) | pid, false);
     usb_packet_addbuf(&ep->packey[dir].p, ep->buf[idx], len);
     ep->packey[dir].ep = ep;
     ep->packey[dir].dir = dir;
diff --git a/hw/usb/hcd-ohci.c b/hw/usb/hcd-ohci.c
index 31dcfbb..00e2e1a 100644
--- a/hw/usb/hcd-ohci.c
+++ b/hw/usb/hcd-ohci.c
@@ -812,7 +812,7 @@ static int ohci_service_iso_td(OHCIState *ohci, struct ohci_ed *ed,
     } else {
         dev = ohci_find_device(ohci, OHCI_BM(ed->flags, ED_FA));
         ep = usb_ep_get(dev, pid, OHCI_BM(ed->flags, ED_EN));
-        usb_packet_setup(&ohci->usb_packet, pid, ep, addr);
+        usb_packet_setup(&ohci->usb_packet, pid, ep, addr, false);
         usb_packet_addbuf(&ohci->usb_packet, ohci->usb_buf, len);
         ret = usb_handle_packet(dev, &ohci->usb_packet);
         if (ret == USB_RET_ASYNC) {
@@ -1012,7 +1012,7 @@ static int ohci_service_td(OHCIState *ohci, struct ohci_ed *ed)
         }
         dev = ohci_find_device(ohci, OHCI_BM(ed->flags, ED_FA));
         ep = usb_ep_get(dev, pid, OHCI_BM(ed->flags, ED_EN));
-        usb_packet_setup(&ohci->usb_packet, pid, ep, addr);
+        usb_packet_setup(&ohci->usb_packet, pid, ep, addr, !flag_r);
         usb_packet_addbuf(&ohci->usb_packet, ohci->usb_buf, pktlen);
         ret = usb_handle_packet(dev, &ohci->usb_packet);
 #ifdef DEBUG_PACKET
diff --git a/hw/usb/hcd-uhci.c b/hw/usb/hcd-uhci.c
index 00dc9d5..953897b 100644
--- a/hw/usb/hcd-uhci.c
+++ b/hw/usb/hcd-uhci.c
@@ -808,6 +808,7 @@ static int uhci_handle_td(UHCIState *s, uint32_t addr, UHCI_TD *td,
     UHCIAsync *async;
     int len = 0, max_len;
     uint8_t pid;
+    bool spd;
     USBDevice *dev;
     USBEndpoint *ep;
 
@@ -852,13 +853,14 @@ static int uhci_handle_td(UHCIState *s, uint32_t addr, UHCI_TD *td,
 
     max_len = ((td->token >> 21) + 1) & 0x7ff;
     pid = td->token & 0xff;
+    spd = (pid == USB_TOKEN_IN && (td->ctrl & TD_CTRL_SPD) != 0);
 
     dev = uhci_find_device(s, (td->token >> 8) & 0x7f);
     ep = usb_ep_get(dev, pid, (td->token >> 15) & 0xf);
     if (ep_ret) {
         *ep_ret = ep;
     }
-    usb_packet_setup(&async->packet, pid, ep, addr);
+    usb_packet_setup(&async->packet, pid, ep, addr, spd);
     qemu_sglist_add(&async->sgl, td->buffer, max_len);
     usb_packet_map(&async->packet, &async->sgl);
 
@@ -985,8 +987,7 @@ static void uhci_fill_queue(UHCIState *s, UHCI_TD *td, struct USBEndpoint *ep)
     UHCI_TD ptd;
     int ret;
 
-    ptd.ctrl = td->ctrl;
-    while (is_valid(plink) && !(ptd.ctrl & TD_CTRL_SPD)) {
+    while (is_valid(plink)) {
         pci_dma_read(&s->dev, plink & ~0xf, &ptd, sizeof(ptd));
         le32_to_cpus(&ptd.link);
         le32_to_cpus(&ptd.ctrl);
diff --git a/hw/usb/hcd-xhci.c b/hw/usb/hcd-xhci.c
index 1f437cc..ebfc5b8 100644
--- a/hw/usb/hcd-xhci.c
+++ b/hw/usb/hcd-xhci.c
@@ -1446,7 +1446,7 @@ static int xhci_setup_packet(XHCITransfer *xfer)
         ep = usb_ep_get(dev, dir, xfer->epid >> 1);
     }
 
-    usb_packet_setup(&xfer->packet, dir, ep, xfer->trbs[0].addr);
+    usb_packet_setup(&xfer->packet, dir, ep, xfer->trbs[0].addr, false);
     xhci_xfer_map(xfer);
     DPRINTF("xhci: setup packet pid 0x%x addr %d ep %d\n",
             xfer->packet.pid, dev->addr, ep->nr);
diff --git a/hw/usb/host-linux.c b/hw/usb/host-linux.c
index 44f1a64..3a258b4 100644
--- a/hw/usb/host-linux.c
+++ b/hw/usb/host-linux.c
@@ -1224,7 +1224,8 @@ static int usb_linux_update_endp_table(USBHostDevice *s)
                 usb_ep_set_type(&s->dev, pid, ep, type);
                 usb_ep_set_ifnum(&s->dev, pid, ep, interface);
                 if ((s->options & (1 << USB_HOST_OPT_PIPELINE)) &&
-                    (type == USB_ENDPOINT_XFER_BULK)) {
+                    (type == USB_ENDPOINT_XFER_BULK) &&
+                    (pid == USB_TOKEN_OUT)) {
                     usb_ep_set_pipeline(&s->dev, pid, ep, true);
                 }
 
diff --git a/hw/usb/redirect.c b/hw/usb/redirect.c
index 2283565..22f671b 100644
--- a/hw/usb/redirect.c
+++ b/hw/usb/redirect.c
@@ -1270,6 +1270,16 @@ static void usbredir_interface_info(void *priv,
     }
 }
 
+static void usbredir_set_pipeline(USBRedirDevice *dev, struct USBEndpoint *uep)
+{
+    if (uep->type != USB_ENDPOINT_XFER_BULK) {
+        return;
+    }
+    if (uep->pid == USB_TOKEN_OUT) {
+        uep->pipeline = true;
+    }
+}
+
 static void usbredir_ep_info(void *priv,
     struct usb_redir_ep_info_header *ep_info)
 {
@@ -1311,9 +1321,7 @@ static void usbredir_ep_info(void *priv,
             dev->endpoint[i].max_packet_size =
                 usb_ep->max_packet_size = ep_info->max_packet_size[i];
         }
-        if (ep_info->type[i] == usb_redir_type_bulk) {
-            usb_ep->pipeline = true;
-        }
+        usbredir_set_pipeline(dev, usb_ep);
     }
 }
 
@@ -1574,9 +1582,7 @@ static int usbredir_post_load(void *priv, int version_id)
         usb_ep->type = dev->endpoint[i].type;
         usb_ep->ifnum = dev->endpoint[i].interface;
         usb_ep->max_packet_size = dev->endpoint[i].max_packet_size;
-        if (dev->endpoint[i].type == usb_redir_type_bulk) {
-            usb_ep->pipeline = true;
-        }
+        usbredir_set_pipeline(dev, usb_ep);
     }
     return 0;
 }
commit 0cae7b1a004d6857e3bde3d511d7efa39d3cb48a
Author: Hans de Goede <hdegoede at redhat.com>
Date:   Wed Oct 24 18:14:08 2012 +0200

    usb: Move clearing of queue on halt to the core
    
    hcds which queue up more then one packet at once (uhci, ehci and xhci),
    must clear the queue after an error which has caused the queue to halt.
    
    Currently this is handled as a special case inside the hcd code, this
    patch instead adds an USB_RET_REMOVE_FROM_QUEUE packet result code, teaches
    the 3 hcds about this and moves the clearing of the queue on a halt into
    the USB core.
    
    Signed-off-by: Hans de Goede <hdegoede at redhat.com>
    Signed-off-by: Gerd Hoffmann <kraxel at redhat.com>

diff --git a/hw/usb.h b/hw/usb.h
index 435cd42..ead03c9 100644
--- a/hw/usb.h
+++ b/hw/usb.h
@@ -45,6 +45,7 @@
 #define USB_RET_IOERROR           (-5)
 #define USB_RET_ASYNC             (-6)
 #define USB_RET_ADD_TO_QUEUE      (-7)
+#define USB_RET_REMOVE_FROM_QUEUE (-8)
 
 #define USB_SPEED_LOW   0
 #define USB_SPEED_FULL  1
diff --git a/hw/usb/core.c b/hw/usb/core.c
index 014e3ac..5a97a0e 100644
--- a/hw/usb/core.c
+++ b/hw/usb/core.c
@@ -442,8 +442,14 @@ void usb_packet_complete(USBDevice *dev, USBPacket *p)
     usb_packet_check_state(p, USB_PACKET_ASYNC);
     usb_packet_complete_one(dev, p);
 
-    while (!ep->halted && !QTAILQ_EMPTY(&ep->queue)) {
+    while (!QTAILQ_EMPTY(&ep->queue)) {
         p = QTAILQ_FIRST(&ep->queue);
+        if (ep->halted) {
+            /* Empty the queue on a halt */
+            p->result = USB_RET_REMOVE_FROM_QUEUE;
+            dev->port->ops->complete(dev->port, p);
+            continue;
+        }
         if (p->state == USB_PACKET_ASYNC) {
             break;
         }
diff --git a/hw/usb/hcd-ehci.c b/hw/usb/hcd-ehci.c
index d11311e..74a2587 100644
--- a/hw/usb/hcd-ehci.c
+++ b/hw/usb/hcd-ehci.c
@@ -1457,8 +1457,15 @@ static void ehci_async_complete_packet(USBPort *port, USBPacket *packet)
     }
 
     p = container_of(packet, EHCIPacket, packet);
-    trace_usb_ehci_packet_action(p->queue, p, "wakeup");
     assert(p->async == EHCI_ASYNC_INFLIGHT);
+
+    if (packet->result == USB_RET_REMOVE_FROM_QUEUE) {
+        trace_usb_ehci_packet_action(p->queue, p, "remove");
+        ehci_free_packet(p);
+        return;
+    }
+
+    trace_usb_ehci_packet_action(p->queue, p, "wakeup");
     p->async = EHCI_ASYNC_FINISHED;
     p->usb_status = packet->result;
 
@@ -2216,19 +2223,6 @@ static int ehci_state_writeback(EHCIQueue *q)
      * bit is clear.
      */
     if (q->qh.token & QTD_TOKEN_HALT) {
-        /*
-         * We should not do any further processing on a halted queue!
-         * This is esp. important for bulk endpoints with pipelining enabled
-         * (redirection to a real USB device), where we must cancel all the
-         * transfers after this one so that:
-         * 1) If they've completed already, they are not processed further
-         *    causing more stalls, originating from the same failed transfer
-         * 2) If still in flight, they are cancelled before the guest does
-         *    a clear stall, otherwise the guest and device can loose sync!
-         */
-        while ((p = QTAILQ_FIRST(&q->packets)) != NULL) {
-            ehci_free_packet(p);
-        }
         ehci_set_state(q->ehci, q->async, EST_HORIZONTALQH);
         again = 1;
     } else {
diff --git a/hw/usb/hcd-uhci.c b/hw/usb/hcd-uhci.c
index 46e544b..00dc9d5 100644
--- a/hw/usb/hcd-uhci.c
+++ b/hw/usb/hcd-uhci.c
@@ -744,22 +744,6 @@ static int uhci_complete_td(UHCIState *s, UHCI_TD *td, UHCIAsync *async, uint32_
     return TD_RESULT_COMPLETE;
 
 out:
-    /*
-     * We should not do any further processing on a queue with errors!
-     * This is esp. important for bulk endpoints with pipelining enabled
-     * (redirection to a real USB device), where we must cancel all the
-     * transfers after this one so that:
-     * 1) If they've completed already, they are not processed further
-     *    causing more stalls, originating from the same failed transfer
-     * 2) If still in flight, they are cancelled before the guest does
-     *    a clear stall, otherwise the guest and device can loose sync!
-     */
-    while (!QTAILQ_EMPTY(&async->queue->asyncs)) {
-        UHCIAsync *as = QTAILQ_FIRST(&async->queue->asyncs);
-        uhci_async_unlink(as);
-        uhci_async_cancel(as);
-    }
-
     switch(ret) {
     case USB_RET_STALL:
         td->ctrl |= TD_CTRL_STALL;
@@ -918,6 +902,12 @@ static void uhci_async_complete(USBPort *port, USBPacket *packet)
     UHCIAsync *async = container_of(packet, UHCIAsync, packet);
     UHCIState *s = async->queue->uhci;
 
+    if (packet->result == USB_RET_REMOVE_FROM_QUEUE) {
+        uhci_async_unlink(async);
+        uhci_async_cancel(async);
+        return;
+    }
+
     if (async->isoc) {
         UHCI_TD td;
         uint32_t link = async->td;
diff --git a/hw/usb/hcd-xhci.c b/hw/usb/hcd-xhci.c
index e8929a0..1f437cc 100644
--- a/hw/usb/hcd-xhci.c
+++ b/hw/usb/hcd-xhci.c
@@ -2839,6 +2839,10 @@ static void xhci_complete(USBPort *port, USBPacket *packet)
 {
     XHCITransfer *xfer = container_of(packet, XHCITransfer, packet);
 
+    if (packet->result == USB_RET_REMOVE_FROM_QUEUE) {
+        xhci_ep_nuke_one_xfer(xfer);
+        return;
+    }
     xhci_complete_packet(xfer, packet->result);
     xhci_kick_ep(xfer->xhci, xfer->slotid, xfer->epid);
 }
commit 36dfe324fd4b5efd9ef1a5b4c352bbb158952e24
Author: Hans de Goede <hdegoede at redhat.com>
Date:   Wed Oct 24 18:14:07 2012 +0200

    usb: Add USB_RET_ADD_TO_QUEUE packet result code
    
    This can be used by usb-device code which wishes to process an entire endpoint
    queue at once, to do this the usb-device code returns USB_RET_ADD_TO_QUEUE
    from its handle_data class method and defines a flush_ep_queue class method
    to call when the hcd is done queuing up packets.
    
    Signed-off-by: Hans de Goede <hdegoede at redhat.com>
    Signed-off-by: Gerd Hoffmann <kraxel at redhat.com>

diff --git a/hw/usb.h b/hw/usb.h
index 01dd423..435cd42 100644
--- a/hw/usb.h
+++ b/hw/usb.h
@@ -38,12 +38,13 @@
 #define USB_TOKEN_IN    0x69 /* device -> host */
 #define USB_TOKEN_OUT   0xe1 /* host -> device */
 
-#define USB_RET_NODEV   (-1)
-#define USB_RET_NAK     (-2)
-#define USB_RET_STALL   (-3)
-#define USB_RET_BABBLE  (-4)
-#define USB_RET_IOERROR (-5)
-#define USB_RET_ASYNC   (-6)
+#define USB_RET_NODEV             (-1)
+#define USB_RET_NAK               (-2)
+#define USB_RET_STALL             (-3)
+#define USB_RET_BABBLE            (-4)
+#define USB_RET_IOERROR           (-5)
+#define USB_RET_ASYNC             (-6)
+#define USB_RET_ADD_TO_QUEUE      (-7)
 
 #define USB_SPEED_LOW   0
 #define USB_SPEED_FULL  1
@@ -293,6 +294,12 @@ typedef struct USBDeviceClass {
     void (*set_interface)(USBDevice *dev, int interface,
                           int alt_old, int alt_new);
 
+    /*
+     * Called when the hcd is done queuing packets for an endpoint, only
+     * necessary for devices which can return USB_RET_ADD_TO_QUEUE.
+     */
+    void (*flush_ep_queue)(USBDevice *dev, USBEndpoint *ep);
+
     const char *product_desc;
     const USBDesc *usb_desc;
 } USBDeviceClass;
@@ -507,6 +514,8 @@ int usb_device_handle_data(USBDevice *dev, USBPacket *p);
 void usb_device_set_interface(USBDevice *dev, int interface,
                               int alt_old, int alt_new);
 
+void usb_device_flush_ep_queue(USBDevice *dev, USBEndpoint *ep);
+
 const char *usb_device_get_product_desc(USBDevice *dev);
 
 const USBDesc *usb_device_get_usb_desc(USBDevice *dev);
diff --git a/hw/usb/bus.c b/hw/usb/bus.c
index b649360..8066291 100644
--- a/hw/usb/bus.c
+++ b/hw/usb/bus.c
@@ -181,6 +181,14 @@ void usb_device_set_interface(USBDevice *dev, int interface,
     }
 }
 
+void usb_device_flush_ep_queue(USBDevice *dev, USBEndpoint *ep)
+{
+    USBDeviceClass *klass = USB_DEVICE_GET_CLASS(dev);
+    if (klass->flush_ep_queue) {
+        klass->flush_ep_queue(dev, ep);
+    }
+}
+
 static int usb_qdev_init(DeviceState *qdev)
 {
     USBDevice *dev = USB_DEVICE(qdev);
diff --git a/hw/usb/core.c b/hw/usb/core.c
index e2e31ca..014e3ac 100644
--- a/hw/usb/core.c
+++ b/hw/usb/core.c
@@ -393,6 +393,10 @@ int usb_handle_packet(USBDevice *dev, USBPacket *p)
         if (ret == USB_RET_ASYNC) {
             usb_packet_set_state(p, USB_PACKET_ASYNC);
             QTAILQ_INSERT_TAIL(&p->ep->queue, p, queue);
+        } else if (ret == USB_RET_ADD_TO_QUEUE) {
+            usb_packet_set_state(p, USB_PACKET_QUEUED);
+            QTAILQ_INSERT_TAIL(&p->ep->queue, p, queue);
+            ret = USB_RET_ASYNC;
         } else {
             /*
              * When pipelining is enabled usb-devices must always return async,
diff --git a/hw/usb/hcd-ehci.c b/hw/usb/hcd-ehci.c
index a02fe96..d11311e 100644
--- a/hw/usb/hcd-ehci.c
+++ b/hw/usb/hcd-ehci.c
@@ -2072,6 +2072,7 @@ static int ehci_state_horizqh(EHCIQueue *q)
 
 static int ehci_fill_queue(EHCIPacket *p)
 {
+    USBEndpoint *ep = p->packet.ep;
     EHCIQueue *q = p->queue;
     EHCIqtd qtd = p->qtd;
     uint32_t qtdaddr, start_addr = p->qtdaddr;
@@ -2107,6 +2108,9 @@ static int ehci_fill_queue(EHCIPacket *p)
         assert(p->usb_status == USB_RET_ASYNC);
         p->async = EHCI_ASYNC_INFLIGHT;
     }
+    if (p->usb_status != USB_RET_PROCERR) {
+        usb_device_flush_ep_queue(ep->dev, ep);
+    }
     return p->usb_status;
 }
 
diff --git a/hw/usb/hcd-musb.c b/hw/usb/hcd-musb.c
index dc114fe..212dd12 100644
--- a/hw/usb/hcd-musb.c
+++ b/hw/usb/hcd-musb.c
@@ -635,6 +635,7 @@ static void musb_packet(MUSBState *s, MUSBEndPoint *ep,
     ret = usb_handle_packet(dev, &ep->packey[dir].p);
 
     if (ret == USB_RET_ASYNC) {
+        usb_device_flush_ep_queue(dev, uep);
         ep->status[dir] = len;
         return;
     }
diff --git a/hw/usb/hcd-ohci.c b/hw/usb/hcd-ohci.c
index 0cc1e5d..31dcfbb 100644
--- a/hw/usb/hcd-ohci.c
+++ b/hw/usb/hcd-ohci.c
@@ -816,6 +816,7 @@ static int ohci_service_iso_td(OHCIState *ohci, struct ohci_ed *ed,
         usb_packet_addbuf(&ohci->usb_packet, ohci->usb_buf, len);
         ret = usb_handle_packet(dev, &ohci->usb_packet);
         if (ret == USB_RET_ASYNC) {
+            usb_device_flush_ep_queue(dev, ep);
             return 1;
         }
     }
@@ -1018,6 +1019,7 @@ static int ohci_service_td(OHCIState *ohci, struct ohci_ed *ed)
         DPRINTF("ret=%d\n", ret);
 #endif
         if (ret == USB_RET_ASYNC) {
+            usb_device_flush_ep_queue(dev, ep);
             ohci->async_td = addr;
             return 1;
         }
diff --git a/hw/usb/hcd-uhci.c b/hw/usb/hcd-uhci.c
index 600d095..46e544b 100644
--- a/hw/usb/hcd-uhci.c
+++ b/hw/usb/hcd-uhci.c
@@ -818,7 +818,8 @@ out:
 }
 
 static int uhci_handle_td(UHCIState *s, uint32_t addr, UHCI_TD *td,
-                          uint32_t *int_mask, bool queuing)
+                          uint32_t *int_mask, bool queuing,
+                          struct USBEndpoint **ep_ret)
 {
     UHCIAsync *async;
     int len = 0, max_len;
@@ -870,6 +871,9 @@ static int uhci_handle_td(UHCIState *s, uint32_t addr, UHCI_TD *td,
 
     dev = uhci_find_device(s, (td->token >> 8) & 0x7f);
     ep = usb_ep_get(dev, pid, (td->token >> 15) & 0xf);
+    if (ep_ret) {
+        *ep_ret = ep;
+    }
     usb_packet_setup(&async->packet, pid, ep, addr);
     qemu_sglist_add(&async->sgl, td->buffer, max_len);
     usb_packet_map(&async->packet, &async->sgl);
@@ -983,7 +987,7 @@ static int qhdb_insert(QhDb *db, uint32_t addr)
     return 0;
 }
 
-static void uhci_fill_queue(UHCIState *s, UHCI_TD *td)
+static void uhci_fill_queue(UHCIState *s, UHCI_TD *td, struct USBEndpoint *ep)
 {
     uint32_t int_mask = 0;
     uint32_t plink = td->link;
@@ -1005,7 +1009,7 @@ static void uhci_fill_queue(UHCIState *s, UHCI_TD *td)
             break;
         }
         trace_usb_uhci_td_queue(plink & ~0xf, ptd.ctrl, ptd.token);
-        ret = uhci_handle_td(s, plink, &ptd, &int_mask, true);
+        ret = uhci_handle_td(s, plink, &ptd, &int_mask, true, NULL);
         if (ret == TD_RESULT_ASYNC_CONT) {
             break;
         }
@@ -1013,12 +1017,14 @@ static void uhci_fill_queue(UHCIState *s, UHCI_TD *td)
         assert(int_mask == 0);
         plink = ptd.link;
     }
+    usb_device_flush_ep_queue(ep->dev, ep);
 }
 
 static void uhci_process_frame(UHCIState *s)
 {
     uint32_t frame_addr, link, old_td_ctrl, val, int_mask;
     uint32_t curr_qh, td_count = 0;
+    struct USBEndpoint *curr_ep;
     int cnt, ret;
     UHCI_TD td;
     UHCI_QH qh;
@@ -1089,7 +1095,7 @@ static void uhci_process_frame(UHCIState *s)
         trace_usb_uhci_td_load(curr_qh & ~0xf, link & ~0xf, td.ctrl, td.token);
 
         old_td_ctrl = td.ctrl;
-        ret = uhci_handle_td(s, link, &td, &int_mask, false);
+        ret = uhci_handle_td(s, link, &td, &int_mask, false, &curr_ep);
         if (old_td_ctrl != td.ctrl) {
             /* update the status bits of the TD */
             val = cpu_to_le32(td.ctrl);
@@ -1108,7 +1114,7 @@ static void uhci_process_frame(UHCIState *s)
 
         case TD_RESULT_ASYNC_START:
             trace_usb_uhci_td_async(curr_qh & ~0xf, link & ~0xf);
-            uhci_fill_queue(s, &td);
+            uhci_fill_queue(s, &td, curr_ep);
             link = curr_qh ? qh.link : td.link;
             continue;
 
diff --git a/hw/usb/hcd-xhci.c b/hw/usb/hcd-xhci.c
index 47d5702..e8929a0 100644
--- a/hw/usb/hcd-xhci.c
+++ b/hw/usb/hcd-xhci.c
@@ -1652,6 +1652,7 @@ static int xhci_fire_transfer(XHCIState *xhci, XHCITransfer *xfer, XHCIEPContext
 static void xhci_kick_ep(XHCIState *xhci, unsigned int slotid, unsigned int epid)
 {
     XHCIEPContext *epctx;
+    USBEndpoint *ep = NULL;
     uint64_t mfindex;
     int length;
     int i;
@@ -1745,12 +1746,14 @@ static void xhci_kick_ep(XHCIState *xhci, unsigned int slotid, unsigned int epid
         if (epid == 1) {
             if (xhci_fire_ctl_transfer(xhci, xfer) >= 0) {
                 epctx->next_xfer = (epctx->next_xfer + 1) % TD_QUEUE;
+                ep = xfer->packet.ep;
             } else {
                 fprintf(stderr, "xhci: error firing CTL transfer\n");
             }
         } else {
             if (xhci_fire_transfer(xhci, xfer, epctx) >= 0) {
                 epctx->next_xfer = (epctx->next_xfer + 1) % TD_QUEUE;
+                ep = xfer->packet.ep;
             } else {
                 if (!xfer->iso_xfer) {
                     fprintf(stderr, "xhci: error firing data transfer\n");
@@ -1767,6 +1770,9 @@ static void xhci_kick_ep(XHCIState *xhci, unsigned int slotid, unsigned int epid
             break;
         }
     }
+    if (ep) {
+        usb_device_flush_ep_queue(ep->dev, ep);
+    }
 }
 
 static TRBCCode xhci_enable_slot(XHCIState *xhci, unsigned int slotid)
commit d0ff81b871af3c6cf31c807dfdd480b9d1032780
Author: Hans de Goede <hdegoede at redhat.com>
Date:   Wed Oct 24 18:14:06 2012 +0200

    usb: Rename __usb_packet_complete to usb_packet_complete_one
    
    And make it available for use outside of core.c
    
    Signed-off-by: Hans de Goede <hdegoede at redhat.com>
    Signed-off-by: Gerd Hoffmann <kraxel at redhat.com>

diff --git a/hw/usb.h b/hw/usb.h
index 48c8926..01dd423 100644
--- a/hw/usb.h
+++ b/hw/usb.h
@@ -370,6 +370,7 @@ USBDevice *usb_find_device(USBPort *port, uint8_t addr);
 
 int usb_handle_packet(USBDevice *dev, USBPacket *p);
 void usb_packet_complete(USBDevice *dev, USBPacket *p);
+void usb_packet_complete_one(USBDevice *dev, USBPacket *p);
 void usb_cancel_packet(USBPacket * p);
 
 void usb_ep_init(USBDevice *dev);
diff --git a/hw/usb/core.c b/hw/usb/core.c
index b9f1f7a..e2e31ca 100644
--- a/hw/usb/core.c
+++ b/hw/usb/core.c
@@ -412,10 +412,11 @@ int usb_handle_packet(USBDevice *dev, USBPacket *p)
     return ret;
 }
 
-static void __usb_packet_complete(USBDevice *dev, USBPacket *p)
+void usb_packet_complete_one(USBDevice *dev, USBPacket *p)
 {
     USBEndpoint *ep = p->ep;
 
+    assert(QTAILQ_FIRST(&ep->queue) == p);
     assert(p->result != USB_RET_ASYNC && p->result != USB_RET_NAK);
 
     if (p->result < 0) {
@@ -435,8 +436,7 @@ void usb_packet_complete(USBDevice *dev, USBPacket *p)
     int ret;
 
     usb_packet_check_state(p, USB_PACKET_ASYNC);
-    assert(QTAILQ_FIRST(&ep->queue) == p);
-    __usb_packet_complete(dev, p);
+    usb_packet_complete_one(dev, p);
 
     while (!ep->halted && !QTAILQ_EMPTY(&ep->queue)) {
         p = QTAILQ_FIRST(&ep->queue);
@@ -450,7 +450,7 @@ void usb_packet_complete(USBDevice *dev, USBPacket *p)
             break;
         }
         p->result = ret;
-        __usb_packet_complete(ep->dev, p);
+        usb_packet_complete_one(ep->dev, p);
     }
 }
 
commit 3151f2096dd676dc42e81ef0d55ae80780fd8769
Author: Hans de Goede <hdegoede at redhat.com>
Date:   Wed Oct 24 18:14:05 2012 +0200

    xhci: Add a xhci_ep_nuke_one_xfer helper function
    
    Signed-off-by: Hans de Goede <hdegoede at redhat.com>
    Signed-off-by: Gerd Hoffmann <kraxel at redhat.com>

diff --git a/hw/usb/hcd-xhci.c b/hw/usb/hcd-xhci.c
index 37b3dbb..47d5702 100644
--- a/hw/usb/hcd-xhci.c
+++ b/hw/usb/hcd-xhci.c
@@ -1082,6 +1082,35 @@ static TRBCCode xhci_enable_ep(XHCIState *xhci, unsigned int slotid,
     return CC_SUCCESS;
 }
 
+static int xhci_ep_nuke_one_xfer(XHCITransfer *t)
+{
+    int killed = 0;
+
+    if (t->running_async) {
+        usb_cancel_packet(&t->packet);
+        t->running_async = 0;
+        t->cancelled = 1;
+        DPRINTF("xhci: cancelling transfer, waiting for it to complete\n");
+        killed = 1;
+    }
+    if (t->running_retry) {
+        XHCIEPContext *epctx = t->xhci->slots[t->slotid-1].eps[t->epid-1];
+        if (epctx) {
+            epctx->retry = NULL;
+            qemu_del_timer(epctx->kick_timer);
+        }
+        t->running_retry = 0;
+    }
+    if (t->trbs) {
+        g_free(t->trbs);
+    }
+
+    t->trbs = NULL;
+    t->trb_count = t->trb_alloced = 0;
+
+    return killed;
+}
+
 static int xhci_ep_nuke_xfers(XHCIState *xhci, unsigned int slotid,
                                unsigned int epid)
 {
@@ -1103,25 +1132,7 @@ static int xhci_ep_nuke_xfers(XHCIState *xhci, unsigned int slotid,
 
     xferi = epctx->next_xfer;
     for (i = 0; i < TD_QUEUE; i++) {
-        XHCITransfer *t = &epctx->transfers[xferi];
-        if (t->running_async) {
-            usb_cancel_packet(&t->packet);
-            t->running_async = 0;
-            t->cancelled = 1;
-            DPRINTF("xhci: cancelling transfer %d, waiting for it to complete...\n", i);
-            killed++;
-        }
-        if (t->running_retry) {
-            t->running_retry = 0;
-            epctx->retry = NULL;
-            qemu_del_timer(epctx->kick_timer);
-        }
-        if (t->trbs) {
-            g_free(t->trbs);
-        }
-
-        t->trbs = NULL;
-        t->trb_count = t->trb_alloced = 0;
+        killed += xhci_ep_nuke_one_xfer(&epctx->transfers[xferi]);
         xferi = (xferi + 1) % TD_QUEUE;
     }
     return killed;
commit b4ea86649915eca5551a5166f76e7a9d9032de50
Author: Hans de Goede <hdegoede at redhat.com>
Date:   Wed Oct 24 18:14:04 2012 +0200

    ehci: Retry to fill the queue while waiting for td completion
    
    If the guest is using multiple transfers to try and keep the usb bus busy /
    used at maximum efficiency, currently we would see / do the following:
    
    1) submit transfer 1 to the device
    2) submit transfer 2 to the device
    3) report transfer 1 completion to guest
    4) report transfer 2 completion to guest
    5) submit transfer 1 to the device
    6) report transfer 1 completion to guest
    7) submit transfer 2 to the device
    8) report transfer 2 completion to guest
    etc.
    
    So after the initial submission we would effectively only have 1 transfer
    in flight, rather then 2. This is caused by us not checking the queue for
    addition of new transfers by the guest (ie the resubmission of a recently
    finished transfer), while waiting for a pending transfer to complete.
    This patch does add a check for this, changing the sequence to:
    
    1) submit transfer 1 to the device
    2) submit transfer 2 to the device
    3) report transfer 1 completion to guest
    4) submit transfer 1 to the device
    5) report transfer 2 completion to guest
    6) submit transfer 2 to the device
    etc.
    
    Thus keeping 2 transfers in flight (most of the time, and always 1),
    as intended by the guest.
    
    Signed-off-by: Hans de Goede <hdegoede at redhat.com>
    Signed-off-by: Gerd Hoffmann <kraxel at redhat.com>

diff --git a/hw/usb/hcd-ehci.c b/hw/usb/hcd-ehci.c
index 8bd87c7..a02fe96 100644
--- a/hw/usb/hcd-ehci.c
+++ b/hw/usb/hcd-ehci.c
@@ -381,7 +381,7 @@ struct EHCIQueue {
     uint32_t qhaddr;       /* address QH read from                 */
     uint32_t qtdaddr;      /* address QTD read from                */
     USBDevice *dev;
-    QTAILQ_HEAD(, EHCIPacket) packets;
+    QTAILQ_HEAD(pkts_head, EHCIPacket) packets;
 };
 
 typedef QTAILQ_HEAD(EHCIQueueHead, EHCIQueue) EHCIQueueHead;
@@ -488,6 +488,7 @@ static const char *ehci_mmio_names[] = {
 
 static int ehci_state_executing(EHCIQueue *q);
 static int ehci_state_writeback(EHCIQueue *q);
+static int ehci_fill_queue(EHCIPacket *p);
 
 static const char *nr2str(const char **n, size_t len, uint32_t nr)
 {
@@ -1994,7 +1995,7 @@ static int ehci_state_fetchqtd(EHCIQueue *q)
 {
     EHCIqtd qtd;
     EHCIPacket *p;
-    int again = 0;
+    int again = 1;
 
     get_dwords(q->ehci, NLPTR_GET(q->qtdaddr), (uint32_t *) &qtd,
                sizeof(EHCIqtd) >> 2);
@@ -2022,7 +2023,6 @@ static int ehci_state_fetchqtd(EHCIQueue *q)
             p = NULL;
         }
         ehci_set_state(q->ehci, q->async, EST_HORIZONTALQH);
-        again = 1;
     } else if (p != NULL) {
         switch (p->async) {
         case EHCI_ASYNC_NONE:
@@ -2031,6 +2031,9 @@ static int ehci_state_fetchqtd(EHCIQueue *q)
             ehci_set_state(q->ehci, q->async, EST_EXECUTE);
             break;
         case EHCI_ASYNC_INFLIGHT:
+            /* Check if the guest has added new tds to the queue */
+            again = (ehci_fill_queue(QTAILQ_LAST(&q->packets, pkts_head)) ==
+                     USB_RET_PROCERR) ? -1 : 1;
             /* Unfinished async handled packet, go horizontal */
             ehci_set_state(q->ehci, q->async, EST_HORIZONTALQH);
             break;
@@ -2042,13 +2045,11 @@ static int ehci_state_fetchqtd(EHCIQueue *q)
             ehci_set_state(q->ehci, q->async, EST_EXECUTING);
             break;
         }
-        again = 1;
     } else {
         p = ehci_alloc_packet(q);
         p->qtdaddr = q->qtdaddr;
         p->qtd = qtd;
         ehci_set_state(q->ehci, q->async, EST_EXECUTE);
-        again = 1;
     }
 
     return again;
commit e3a36bce1d0123d003855f7731494e6d6f550fcc
Author: Hans de Goede <hdegoede at redhat.com>
Date:   Wed Oct 24 18:14:03 2012 +0200

    ehci: Detect going in circles when filling the queue
    
    For ctrl endpoints Windows (atleast Win7) creates circular td lists, so far
    these were not a problem because we would stop filling the queue if altnext
    was set. Since further patches in this patchset remove the altnext check this
    does become a problem and we need detection for going in circles.
    
    Signed-off-by: Hans de Goede <hdegoede at redhat.com>
    Signed-off-by: Gerd Hoffmann <kraxel at redhat.com>

diff --git a/hw/usb/hcd-ehci.c b/hw/usb/hcd-ehci.c
index d600f08..8bd87c7 100644
--- a/hw/usb/hcd-ehci.c
+++ b/hw/usb/hcd-ehci.c
@@ -2073,7 +2073,7 @@ static int ehci_fill_queue(EHCIPacket *p)
 {
     EHCIQueue *q = p->queue;
     EHCIqtd qtd = p->qtd;
-    uint32_t qtdaddr;
+    uint32_t qtdaddr, start_addr = p->qtdaddr;
 
     for (;;) {
         if (NLPTR_TBIT(qtd.altnext) == 0) {
@@ -2083,6 +2083,13 @@ static int ehci_fill_queue(EHCIPacket *p)
             break;
         }
         qtdaddr = qtd.next;
+        /*
+         * Detect circular td lists, Windows creates these, counting on the
+         * active bit going low after execution to make the queue stop.
+         */
+        if (qtdaddr == start_addr) {
+            break;
+        }
         get_dwords(q->ehci, NLPTR_GET(qtdaddr),
                    (uint32_t *) &qtd, sizeof(EHCIqtd) >> 2);
         ehci_trace_qtd(q, NLPTR_GET(qtdaddr), &qtd);
commit 44272b0f88247e2d2960c0ef19b546c206a10080
Author: Hans de Goede <hdegoede at redhat.com>
Date:   Wed Oct 24 18:14:02 2012 +0200

    ehci: Speed up the timer of raising int from the async schedule
    
    Often the guest will queue up new packets in response to a packet, in the
    async schedule with its IOC flag set, completing. By speeding up the
    frame-timer, we notice these new packets earlier. This increases the
    speed (MB/s) of a Linux guest reading from a USB mass storage device by a
    factor of 1.15 on top of the "Improve latency of interrupt delivery"
    speed-ups, both with and without input pipelining enabled.
    
    I've not tested the speed-up of this patch without the
    "Improve latency of interrupt delivery" patch.
    
    Signed-off-by: Hans de Goede <hdegoede at redhat.com>
    Signed-off-by: Gerd Hoffmann <kraxel at redhat.com>

diff --git a/hw/usb/hcd-ehci.c b/hw/usb/hcd-ehci.c
index f9ae05e..d600f08 100644
--- a/hw/usb/hcd-ehci.c
+++ b/hw/usb/hcd-ehci.c
@@ -443,6 +443,7 @@ struct EHCIState {
 
     uint64_t last_run_ns;
     uint32_t async_stepdown;
+    bool int_req_by_async;
 };
 
 #define SET_LAST_RUN_CLOCK(s) \
@@ -1529,6 +1530,9 @@ static void ehci_execute_complete(EHCIQueue *q)
 
     if (q->qh.token & QTD_TOKEN_IOC) {
         ehci_raise_irq(q->ehci, USBSTS_INT);
+        if (q->async) {
+            q->ehci->int_req_by_async = true;
+        }
     }
 }
 
@@ -2504,8 +2508,15 @@ static void ehci_frame_timer(void *opaque)
     }
 
     if (need_timer) {
-        expire_time = t_now + (get_ticks_per_sec()
+        /* If we've raised int, we speed up the timer, so that we quickly
+         * notice any new packets queued up in response */
+        if (ehci->int_req_by_async && (ehci->usbsts & USBSTS_INT)) {
+            expire_time = t_now + get_ticks_per_sec() / (FRAME_TIMER_FREQ * 2);
+            ehci->int_req_by_async = false;
+        } else {
+            expire_time = t_now + (get_ticks_per_sec()
                                * (ehci->async_stepdown+1) / FRAME_TIMER_FREQ);
+        }
         qemu_mod_timer(ehci->frame_timer, expire_time);
     }
 }
commit 0262f65aaae49d582e7d4e4b1b5c8cfe4cd19d6d
Author: Hans de Goede <hdegoede at redhat.com>
Date:   Wed Oct 24 18:14:01 2012 +0200

    ehci: Improve latency of interrupt delivery and async schedule scanning
    
    While doing various performance tests of reading from USB mass storage devices
    I noticed the following::
    1) When an async handled packet completes, we don't immediately report an
       interrupt to the guest, instead we wait for the frame-timer to run and
       report it from there
    2) If 1) has been fixed and an async handled packet takes a while to complete,
       then async_stepdown will become a high value, which means that there
       will be a large latency before any new packets queued by the guest in
       response to the interrupt get seen
    
    1) was done deliberately as part of commit f0ad01f92:
    http://www.kraxel.org/cgit/qemu/commit/?h=usb.57&id=f0ad01f92ca02eee7cadbfd225c5de753ebd5fce
    Since setting the interrupt immediately on async packet completion was causing
    issues with Linux guests, I believe this recently fixed Linux bug explains
    why this is happening:
    http://git.kernel.org/?p=linux/kernel/git/torvalds/linux.git;a=commitdiff;h=361aabf395e4a23cf554cf4ec0c0c6963b8beb01
    
    Note that we can *not* count on this fix being present in all Linux guests!
    
    I was hoping that the recently added support for Interrupt Threshold Control
    would fix the issues with Linux guests, but adding a simple ehci_commit_irq()
    call to ehci_async_bh() still caused problems with Linux guests.
    
    The problem is, that when doing ehci_commit_irq() from ehci_async_bh(),
    the "old" frindex value is used to calculate usbsts_frindex, and when
    the frame-timer then runs possibly very shortly after ehci_async_bh(),
    it increases the frame-timer, and thus any interrupts raised from that
    frame-timer run, will also get reported to the guest immediately, rather
    then being delayed to the next frame-timer run.
    
    Luckily the solution for this is simple, this means that we need to
    increase frindex before calling ehci_commit_irq() from ehci_async_bh(),
    which in the end boils down to simple calling ehci_frame_timer() instead
    of ehci_async_bh() from the bh.
    
    This may seem like it causes a lot of extra work to be done, but this
    is not true. Any work done from the frame-timer processing the periodic
    schedule is work which then does not need to be done the next time the
    frame timer runs, also the frame-timer will re-arm itself at (possibly)
    a later time then it was armed for saving a vmexit at that time.
    
    As an additional advantage moving to simply calling the frame-timer also
    fixes 2) as the packet completion will set async_stepdown to 0, and the
    re-arming of the timer with an async_stepdown of 0 ensures that any
    newly queued up packets get seen in a reasonable amount of time.
    
    This improves the speed (MB/s) of a Linux guest reading from a USB mass
    storage device by a factor of 1.5 - 1.7 with input pipelining disabled,
    and by a factor of 1.8 with input pipelining enabled.
    
    Signed-off-by: Hans de Goede <hdegoede at redhat.com>
    Signed-off-by: Gerd Hoffmann <kraxel at redhat.com>

diff --git a/hw/usb/hcd-ehci.c b/hw/usb/hcd-ehci.c
index 8b4e3c8..f9ae05e 100644
--- a/hw/usb/hcd-ehci.c
+++ b/hw/usb/hcd-ehci.c
@@ -1244,7 +1244,7 @@ static void ehci_opreg_write(void *ptr, hwaddr addr,
             s->usbcmd = val; /* Set usbcmd for ehci_update_halt() */
             ehci_update_halt(s);
             s->async_stepdown = 0;
-            qemu_mod_timer(s->frame_timer, qemu_get_clock_ns(vm_clock));
+            qemu_bh_schedule(s->async_bh);
         }
         break;
 
@@ -2510,12 +2510,6 @@ static void ehci_frame_timer(void *opaque)
     }
 }
 
-static void ehci_async_bh(void *opaque)
-{
-    EHCIState *ehci = opaque;
-    ehci_advance_async_state(ehci);
-}
-
 static const MemoryRegionOps ehci_mmio_caps_ops = {
     .read = ehci_caps_read,
     .valid.min_access_size = 1,
@@ -2744,7 +2738,7 @@ static int usb_ehci_initfn(PCIDevice *dev)
     }
 
     s->frame_timer = qemu_new_timer_ns(vm_clock, ehci_frame_timer, s);
-    s->async_bh = qemu_bh_new(ehci_async_bh, s);
+    s->async_bh = qemu_bh_new(ehci_frame_timer, s);
     QTAILQ_INIT(&s->aqueues);
     QTAILQ_INIT(&s->pqueues);
     usb_packet_init(&s->ipacket);
commit cf08a8a1f600b2ac25f72cf5736247f3e95cc43d
Author: Hans de Goede <hdegoede at redhat.com>
Date:   Wed Oct 24 18:14:00 2012 +0200

    ehci: Set int flag on a short input packet
    
    According to 4.15.1.2 an interrupt must be raised when a short packet
    is received. If we don't do this it may take a significant time for
    the guest to notice a short trasnfer has completed, since only the last td
    will have its IOC flag set, and a short transfer may complete in an earlier
    packet.
    
    Signed-off-by: Hans de Goede <hdegoede at redhat.com>
    Signed-off-by: Gerd Hoffmann <kraxel at redhat.com>

diff --git a/hw/usb/hcd-ehci.c b/hw/usb/hcd-ehci.c
index 82f4dbd..8b4e3c8 100644
--- a/hw/usb/hcd-ehci.c
+++ b/hw/usb/hcd-ehci.c
@@ -1508,6 +1508,10 @@ static void ehci_execute_complete(EHCIQueue *q)
 
         if (tbytes && p->pid == USB_TOKEN_IN) {
             tbytes -= p->usb_status;
+            if (tbytes) {
+                /* 4.15.1.2 must raise int on a short input packet */
+                ehci_raise_irq(q->ehci, USBSTS_INT);
+            }
         } else {
             tbytes = 0;
         }
commit 549a3c3d9665bdc0246fb6594b6e36ce3afadd21
Author: Hans de Goede <hdegoede at redhat.com>
Date:   Wed Oct 24 18:13:59 2012 +0200

    ehci: Get rid of packet tbytes field
    
    This field is used in some places to track the tbytes field of the token, but
    in other places the field is used directly, use it directly everywhere for
    consistency.
    
    Signed-off-by: Hans de Goede <hdegoede at redhat.com>
    Signed-off-by: Gerd Hoffmann <kraxel at redhat.com>

diff --git a/hw/usb/hcd-ehci.c b/hw/usb/hcd-ehci.c
index 6c65a73..82f4dbd 100644
--- a/hw/usb/hcd-ehci.c
+++ b/hw/usb/hcd-ehci.c
@@ -362,7 +362,6 @@ struct EHCIPacket {
     USBPacket packet;
     QEMUSGList sgl;
     int pid;
-    uint32_t tbytes;
     enum async_state async;
     int usb_status;
 };
@@ -1505,15 +1504,16 @@ static void ehci_execute_complete(EHCIQueue *q)
         }
     } else {
         // TODO check 4.12 for splits
+        uint32_t tbytes = get_field(q->qh.token, QTD_TOKEN_TBYTES);
 
-        if (p->tbytes && p->pid == USB_TOKEN_IN) {
-            p->tbytes -= p->usb_status;
+        if (tbytes && p->pid == USB_TOKEN_IN) {
+            tbytes -= p->usb_status;
         } else {
-            p->tbytes = 0;
+            tbytes = 0;
         }
 
-        DPRINTF("updating tbytes to %d\n", p->tbytes);
-        set_field(&q->qh.token, p->tbytes, QTD_TOKEN_TBYTES);
+        DPRINTF("updating tbytes to %d\n", tbytes);
+        set_field(&q->qh.token, tbytes, QTD_TOKEN_TBYTES);
     }
     ehci_finish_transfer(q, p->usb_status);
     usb_packet_unmap(&p->packet, &p->sgl);
@@ -1544,8 +1544,7 @@ static int ehci_execute(EHCIPacket *p, const char *action)
         return USB_RET_PROCERR;
     }
 
-    p->tbytes = (p->qtd.token & QTD_TOKEN_TBYTES_MASK) >> QTD_TOKEN_TBYTES_SH;
-    if (p->tbytes > BUFF_SIZE) {
+    if (get_field(p->qtd.token, QTD_TOKEN_TBYTES) > BUFF_SIZE) {
         ehci_trace_guest_bug(p->queue->ehci,
                              "guest requested more bytes than allowed");
         return USB_RET_PROCERR;
@@ -1582,10 +1581,9 @@ static int ehci_execute(EHCIPacket *p, const char *action)
 
     trace_usb_ehci_packet_action(p->queue, p, action);
     ret = usb_handle_packet(p->queue->dev, &p->packet);
-    DPRINTF("submit: qh %x next %x qtd %x pid %x len %zd "
-            "(total %d) endp %x ret %d\n",
+    DPRINTF("submit: qh %x next %x qtd %x pid %x len %zd endp %x ret %d\n",
             q->qhaddr, q->qh.next, q->qtdaddr, q->pid,
-            q->packet.iov.size, q->tbytes, endp, ret);
+            q->packet.iov.size, endp, ret);
 
     if (ret > BUFF_SIZE) {
         fprintf(stderr, "ret from usb_handle_packet > BUFF_SIZE\n");
commit 7c2eaca4efa46e02caaec4ca7ddf05a6e461da94
Author: Hans de Goede <hdegoede at redhat.com>
Date:   Wed Oct 24 18:13:58 2012 +0200

    uhci: Move checks to continue queuing to uhci_fill_queue()
    
    Rather then having a special check to start queuing after the first packet,
    and then another check for the other packets in uhci_fill_queue(), simply
    check the previous packet beforehand in uhci_fill_queue()
    
    Signed-off-by: Hans de Goede <hdegoede at redhat.com>
    Signed-off-by: Gerd Hoffmann <kraxel at redhat.com>

diff --git a/hw/usb/hcd-uhci.c b/hw/usb/hcd-uhci.c
index 671c712..600d095 100644
--- a/hw/usb/hcd-uhci.c
+++ b/hw/usb/hcd-uhci.c
@@ -991,7 +991,8 @@ static void uhci_fill_queue(UHCIState *s, UHCI_TD *td)
     UHCI_TD ptd;
     int ret;
 
-    while (is_valid(plink)) {
+    ptd.ctrl = td->ctrl;
+    while (is_valid(plink) && !(ptd.ctrl & TD_CTRL_SPD)) {
         pci_dma_read(&s->dev, plink & ~0xf, &ptd, sizeof(ptd));
         le32_to_cpus(&ptd.link);
         le32_to_cpus(&ptd.ctrl);
@@ -1010,9 +1011,6 @@ static void uhci_fill_queue(UHCIState *s, UHCI_TD *td)
         }
         assert(ret == TD_RESULT_ASYNC_START);
         assert(int_mask == 0);
-        if (ptd.ctrl & TD_CTRL_SPD) {
-            break;
-        }
         plink = ptd.link;
     }
 }
@@ -1110,9 +1108,7 @@ static void uhci_process_frame(UHCIState *s)
 
         case TD_RESULT_ASYNC_START:
             trace_usb_uhci_td_async(curr_qh & ~0xf, link & ~0xf);
-            if (is_valid(td.link) && !(td.ctrl & TD_CTRL_SPD)) {
-                uhci_fill_queue(s, &td);
-            }
+            uhci_fill_queue(s, &td);
             link = curr_qh ? qh.link : td.link;
             continue;
 
commit 00a0770de3ddb803a0f81f6aea40b0f945154a68
Author: Hans de Goede <hdegoede at redhat.com>
Date:   Wed Oct 24 18:13:57 2012 +0200

    uhci: Properly unmap packets on cancel / invalid pid
    
    Packets with an invalid pid, or which were cancelled have
    usb_packet_map() called on them on init, but not usb_packet_unmap()
    before being freed.
    
    Signed-off-by: Hans de Goede <hdegoede at redhat.com>
    Signed-off-by: Gerd Hoffmann <kraxel at redhat.com>

diff --git a/hw/usb/hcd-uhci.c b/hw/usb/hcd-uhci.c
index c2f08e3..671c712 100644
--- a/hw/usb/hcd-uhci.c
+++ b/hw/usb/hcd-uhci.c
@@ -236,6 +236,7 @@ static void uhci_async_cancel(UHCIAsync *async)
     trace_usb_uhci_packet_cancel(async->queue->token, async->td, async->done);
     if (!async->done)
         usb_cancel_packet(&async->packet);
+    usb_packet_unmap(&async->packet, &async->sgl);
     uhci_async_free(async);
 }
 
@@ -887,6 +888,7 @@ static int uhci_handle_td(UHCIState *s, uint32_t addr, UHCI_TD *td,
 
     default:
         /* invalid pid : frame interrupted */
+        usb_packet_unmap(&async->packet, &async->sgl);
         uhci_async_free(async);
         s->status |= UHCI_STS_HCPERR;
         uhci_update_irq(s);
commit 8473f377393219390ea6f2d8d450a2b054bb823e
Author: Luiz Capitulino <lcapitulino at redhat.com>
Date:   Wed Oct 24 14:12:15 2012 -0200

    win32: fix broken build due to missing QEMU_MADV_HUGEPAGE
    
    Commit ad0b5321f1f797274603ebbe20108b0750baee94 forgot to add
    QEMU_MADV_HUGEPAGE macros for when CONFIG_MADVISE is not defined.
    This broke the build for Windows. Fix it.
    
    Signed-off-by: Luiz Capitulino <lcapitulino at redhat.com>
    Signed-off-by: Anthony Liguori <aliguori at us.ibm.com>

diff --git a/osdep.h b/osdep.h
index c5fd3d9..585e2c1 100644
--- a/osdep.h
+++ b/osdep.h
@@ -121,6 +121,7 @@ void qemu_vfree(void *ptr);
 #define QEMU_MADV_DONTFORK  QEMU_MADV_INVALID
 #define QEMU_MADV_MERGEABLE QEMU_MADV_INVALID
 #define QEMU_MADV_DONTDUMP QEMU_MADV_INVALID
+#define QEMU_MADV_HUGEPAGE  QEMU_MADV_INVALID
 
 #else /* no-op */
 
@@ -129,6 +130,7 @@ void qemu_vfree(void *ptr);
 #define QEMU_MADV_DONTFORK  QEMU_MADV_INVALID
 #define QEMU_MADV_MERGEABLE QEMU_MADV_INVALID
 #define QEMU_MADV_DONTDUMP QEMU_MADV_INVALID
+#define QEMU_MADV_HUGEPAGE  QEMU_MADV_INVALID
 
 #endif
 
commit c6b8141b84e1b80eff1f04e2a5feb38decae65d8
Merge: a8170e5... 6dd844d...
Author: Anthony Liguori <aliguori at us.ibm.com>
Date:   Wed Oct 24 09:39:49 2012 -0500

    Merge remote-tracking branch 'bonzini/nbd-next' into staging
    
    * bonzini/nbd-next: (30 commits)
      qmp: add NBD server commands
      block: add close notifiers
      block: prepare code for adding block notifiers
      qemu-sockets: add socket_listen, socket_connect, socket_parse
      tests: do not include tools-obj-y Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>
      qemu-sockets: return InetSocketAddress from inet_parse
      qapi: add socket address types
      build: add QAPI files to the tools
      vnc: drop QERR_VNC_SERVER_FAILED
      qemu-sockets: add error propagation to Unix socket functions
      qemu-sockets: add error propagation to inet_parse
      qemu-sockets: add error propagation to inet_dgram_opts
      qemu-sockets: add error propagation to inet_connect_addr
      qemu-sockets: include strerror or gai_strerror output in error messages
      vnc: add error propagation to vnc_display_open
      vnc: reorganize code for reverse mode
      vnc: introduce a single label for error returns
      vnc: avoid Yoda conditionals
      qemu-ga: ask and print error information from qemu-sockets
      nbd: ask and print error information from qemu-sockets
      ...

commit 29ed72f15a4c8fd3ac106d874f76cc27b654fd25
Author: Paolo Bonzini <pbonzini at redhat.com>
Date:   Fri Oct 19 16:45:24 2012 +0200

    migration: go to paused state after finishing incoming migration with -S
    
    At the end of migration the machine has started already, and cannot be
    destroyed without losing the guest's data.  Hence, prelaunch is the
    wrong state.  Go to the paused state instead.  QEMU would reach that
    state anyway (after running the guest for the blink of an eye) if the
    "stop" command had been received after the start of migration.
    
    Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>
    Signed-off-by: Luiz Capitulino <lcapitulino at redhat.com>

diff --git a/migration.c b/migration.c
index 62e0304..e9a5822 100644
--- a/migration.c
+++ b/migration.c
@@ -102,7 +102,7 @@ void process_incoming_migration(QEMUFile *f)
     if (autostart) {
         vm_start();
     } else {
-        runstate_set(RUN_STATE_PRELAUNCH);
+        runstate_set(RUN_STATE_PAUSED);
     }
 }
 
diff --git a/vl.c b/vl.c
index ee3c43a..188af45 100644
--- a/vl.c
+++ b/vl.c
@@ -341,7 +341,7 @@ static const RunStateTransition runstate_transitions_def[] = {
     { RUN_STATE_DEBUG, RUN_STATE_RUNNING },
 
     { RUN_STATE_INMIGRATE, RUN_STATE_RUNNING },
-    { RUN_STATE_INMIGRATE, RUN_STATE_PRELAUNCH },
+    { RUN_STATE_INMIGRATE, RUN_STATE_PAUSED },
 
     { RUN_STATE_INTERNAL_ERROR, RUN_STATE_PAUSED },
     { RUN_STATE_INTERNAL_ERROR, RUN_STATE_FINISH_MIGRATE },
commit 1e9981465f05a0f103d7e09afd975c9c0ff6d132
Author: Paolo Bonzini <pbonzini at redhat.com>
Date:   Tue Oct 23 14:54:21 2012 +0200

    qmp: handle stop/cont in INMIGRATE state
    
    Right now, stop followed by an incoming migration will let the
    virtual machine start.  cont before an incoming migration instead
    will fail.
    
    This is bad because the actual behavior is not predictable; it is
    racy with respect to the start of the incoming migration.  That's
    because incoming migration is blocking, and thus will delay the
    processing of stop/cont until the end of the migration.
    
    In addition, there's nothing that really prevents the user from
    typing the block device's passwords before incoming migration is
    done, so returning the DeviceEncrypted error is also helpful in
    the QMP case.
    
    Both things can be fixed by just toggling the autostart variable when
    stop/cont are called in INMIGRATE state.
    
    Note that libvirt is currently working around the race by looping
    if the MigrationExpected answer is returned.  After this patch, the
    command will return right away without ever raising an error.
    
    Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>
    Signed-off-by: Luiz Capitulino <lcapitulino at redhat.com>

diff --git a/qapi-schema.json b/qapi-schema.json
index c615ee2..6b14edc 100644
--- a/qapi-schema.json
+++ b/qapi-schema.json
@@ -22,15 +22,11 @@
 # @KVMMissingCap: the requested operation can't be fulfilled because a
 #                 required KVM capability is missing
 #
-# @MigrationExpected: the requested operation can't be fulfilled because a
-#                     migration process is expected
-#
 # Since: 1.2
 ##
 { 'enum': 'ErrorClass',
   'data': [ 'GenericError', 'CommandNotFound', 'DeviceEncrypted',
-            'DeviceNotActive', 'DeviceNotFound', 'KVMMissingCap',
-            'MigrationExpected' ] }
+            'DeviceNotActive', 'DeviceNotFound', 'KVMMissingCap' ] }
 
 ##
 # @add_client
@@ -149,7 +145,11 @@
 #
 # @finish-migrate: guest is paused to finish the migration process
 #
-# @inmigrate: guest is paused waiting for an incoming migration
+# @inmigrate: guest is paused waiting for an incoming migration.  Note
+# that this state does not tell whether the machine will start at the
+# end of the migration.  This depends on the command-line -S option and
+# any invocation of 'stop' or 'cont' that has happened since QEMU was
+# started.
 #
 # @internal-error: An internal error that prevents further guest execution
 # has occurred
@@ -1210,7 +1210,9 @@
 # Since:  0.14.0
 #
 # Notes:  This function will succeed even if the guest is already in the stopped
-#         state
+#         state.  In "inmigrate" state, it will ensure that the guest
+#         remains paused once migration finishes, as if the -S option was
+#         passed on the command line.
 ##
 { 'command': 'stop' }
 
@@ -1299,11 +1301,14 @@
 # Since:  0.14.0
 #
 # Returns:  If successful, nothing
-#           If the QEMU is waiting for an incoming migration, MigrationExpected
 #           If QEMU was started with an encrypted block device and a key has
 #              not yet been set, DeviceEncrypted.
 #
-# Notes:  This command will succeed if the guest is currently running.
+# Notes:  This command will succeed if the guest is currently running.  It
+#         will also succeed if the guest is in the "inmigrate" state; in
+#         this case, the effect of the command is to make sure the guest
+#         starts once migration finishes, removing the effect of the -S
+#         command line option if it was passed.
 ##
 { 'command': 'cont' }
 
diff --git a/qerror.h b/qerror.h
index c91708c..5e98a39 100644
--- a/qerror.h
+++ b/qerror.h
@@ -165,9 +165,6 @@ void assert_no_error(Error *err);
 #define QERR_MIGRATION_NOT_SUPPORTED \
     ERROR_CLASS_GENERIC_ERROR, "State blocked by non-migratable device '%s'"
 
-#define QERR_MIGRATION_EXPECTED \
-    ERROR_CLASS_MIGRATION_EXPECTED, "An incoming migration is expected before this command can be executed"
-
 #define QERR_MISSING_PARAMETER \
     ERROR_CLASS_GENERIC_ERROR, "Parameter '%s' is missing"
 
diff --git a/qmp.c b/qmp.c
index 36c54c5..2c8d559 100644
--- a/qmp.c
+++ b/qmp.c
@@ -85,7 +85,11 @@ void qmp_quit(Error **err)
 
 void qmp_stop(Error **errp)
 {
-    vm_stop(RUN_STATE_PAUSED);
+    if (runstate_check(RUN_STATE_INMIGRATE)) {
+        autostart = 0;
+    } else {
+        vm_stop(RUN_STATE_PAUSED);
+    }
 }
 
 void qmp_system_reset(Error **errp)
@@ -144,10 +148,7 @@ void qmp_cont(Error **errp)
 {
     Error *local_err = NULL;
 
-    if (runstate_check(RUN_STATE_INMIGRATE)) {
-        error_set(errp, QERR_MIGRATION_EXPECTED);
-        return;
-    } else if (runstate_check(RUN_STATE_INTERNAL_ERROR) ||
+    if (runstate_check(RUN_STATE_INTERNAL_ERROR) ||
                runstate_check(RUN_STATE_SHUTDOWN)) {
         error_set(errp, QERR_RESET_REQUIRED);
         return;
@@ -162,7 +163,11 @@ void qmp_cont(Error **errp)
         return;
     }
 
-    vm_start();
+    if (runstate_check(RUN_STATE_INMIGRATE)) {
+        autostart = 1;
+    } else {
+        vm_start();
+    }
 }
 
 void qmp_system_wakeup(Error **errp)
commit 852bef0e0c03e2de9d6441471219cd3bc1bf45b5
Author: Aurelien Jarno <aurelien at aurel32.net>
Date:   Fri Oct 19 23:19:19 2012 +0200

    hmp: fix info cpus for sparc targets
    
    On sparc targets, info cpus returns this kind of output:
    
    | info cpus
    | * CPU #0: pc=0x0000000000424d18pc=0x0000000000424d18npc=0x0000000000424d1c thread_id=19460
    
    pc is printed twice, there is no space between pc, pc and npc.
    
    With this patch, pc is not printed anymore when has_npc is set. In addition
    the space is printed before pc/nip/npc/PC instead of after the colon so that
    multiple prints are possible. This result on the following kind of input on
    sparc targets:
    
    | info cpus
    | * CPU #0: pc=0x0000000000424d18 npc=0x0000000000424d1c thread_id=19460
    
    Cc: Luiz Capitulino <lcapitulino at redhat.com>
    Cc: Markus Armbruster <armbru at redhat.com>
    Cc: Blue Swirl <blauwirbel at gmail.com>
    Signed-off-by: Aurelien Jarno <aurelien at aurel32.net>
    Signed-off-by: Luiz Capitulino <lcapitulino at redhat.com>

diff --git a/hmp.c b/hmp.c
index 2b97982..9df84e3 100644
--- a/hmp.c
+++ b/hmp.c
@@ -245,20 +245,19 @@ void hmp_info_cpus(Monitor *mon)
             active = '*';
         }
 
-        monitor_printf(mon, "%c CPU #%" PRId64 ": ", active, cpu->value->CPU);
+        monitor_printf(mon, "%c CPU #%" PRId64 ":", active, cpu->value->CPU);
 
         if (cpu->value->has_pc) {
-            monitor_printf(mon, "pc=0x%016" PRIx64, cpu->value->pc);
+            monitor_printf(mon, " pc=0x%016" PRIx64, cpu->value->pc);
         }
         if (cpu->value->has_nip) {
-            monitor_printf(mon, "nip=0x%016" PRIx64, cpu->value->nip);
+            monitor_printf(mon, " nip=0x%016" PRIx64, cpu->value->nip);
         }
         if (cpu->value->has_npc) {
-            monitor_printf(mon, "pc=0x%016" PRIx64, cpu->value->pc);
-            monitor_printf(mon, "npc=0x%016" PRIx64, cpu->value->npc);
+            monitor_printf(mon, " npc=0x%016" PRIx64, cpu->value->npc);
         }
         if (cpu->value->has_PC) {
-            monitor_printf(mon, "PC=0x%016" PRIx64, cpu->value->PC);
+            monitor_printf(mon, " PC=0x%016" PRIx64, cpu->value->PC);
         }
 
         if (cpu->value->halted) {
commit 8b279a60dc3ca53923701dfec6e54bea9d13cfb7
Author: Peter Maydell <peter.maydell at linaro.org>
Date:   Fri Oct 19 04:23:05 2012 +0000

    target-arm: Remove out of date FIXME regarding saturating arithmetic
    
    Remove an out of date FIXME regarding the saturating arithmetic helpers:
    we now do pass a pointer to CPUARMState to these helpers, and since
    the AREG0 changes went in there is no difference between helper.c
    and op_helper.c and therefore no point in moving the functions.
    
    Signed-off-by: Peter Maydell <peter.maydell at linaro.org>

diff --git a/target-arm/op_helper.c b/target-arm/op_helper.c
index aef592a..6e3ab90 100644
--- a/target-arm/op_helper.c
+++ b/target-arm/op_helper.c
@@ -93,8 +93,6 @@ void tlb_fill(CPUARMState *env, target_ulong addr, int is_write, int mmu_idx,
 }
 #endif
 
-/* FIXME: Pass an explicit pointer to QF to CPUARMState, and move saturating
-   instructions into helper.c  */
 uint32_t HELPER(add_setq)(CPUARMState *env, uint32_t a, uint32_t b)
 {
     uint32_t res = a + b;
commit 36c91fd115a5920a35d4cb0e7a1903281469e0db
Author: Peter Maydell <peter.maydell at linaro.org>
Date:   Thu Oct 18 17:40:41 2012 +0100

    target-arm: Implement abs_i32 inline rather than as a helper
    
    Implement abs_i32 inline (with movcond) rather than using a helper
    function.
    
    Reviewed-by: Aurelien Jarno <aurelien at aurel32.net>
    Signed-off-by: Peter Maydell <peter.maydell at linaro.org>

diff --git a/target-arm/helper.c b/target-arm/helper.c
index 8f2cba6..ab8b734 100644
--- a/target-arm/helper.c
+++ b/target-arm/helper.c
@@ -1562,11 +1562,6 @@ uint32_t HELPER(rbit)(uint32_t x)
     return x;
 }
 
-uint32_t HELPER(abs)(uint32_t x)
-{
-    return ((int32_t)x < 0) ? -x : x;
-}
-
 #if defined(CONFIG_USER_ONLY)
 
 void do_interrupt (CPUARMState *env)
diff --git a/target-arm/helper.h b/target-arm/helper.h
index fa3472f..60812de 100644
--- a/target-arm/helper.h
+++ b/target-arm/helper.h
@@ -13,7 +13,6 @@ DEF_HELPER_2(double_saturate, i32, env, s32)
 DEF_HELPER_FLAGS_2(sdiv, TCG_CALL_CONST | TCG_CALL_PURE, s32, s32, s32)
 DEF_HELPER_FLAGS_2(udiv, TCG_CALL_CONST | TCG_CALL_PURE, i32, i32, i32)
 DEF_HELPER_FLAGS_1(rbit, TCG_CALL_CONST | TCG_CALL_PURE, i32, i32)
-DEF_HELPER_FLAGS_1(abs, TCG_CALL_CONST | TCG_CALL_PURE, i32, i32)
 
 #define PAS_OP(pfx)  \
     DEF_HELPER_3(pfx ## add8, i32, i32, i32, ptr) \
diff --git a/target-arm/translate.c b/target-arm/translate.c
index d33f94c..25433da 100644
--- a/target-arm/translate.c
+++ b/target-arm/translate.c
@@ -462,8 +462,15 @@ static void gen_sar(TCGv dest, TCGv t0, TCGv t1)
     tcg_temp_free_i32(tmp1);
 }
 
-/* FIXME:  Implement this natively.  */
-#define tcg_gen_abs_i32(t0, t1) gen_helper_abs(t0, t1)
+static void tcg_gen_abs_i32(TCGv dest, TCGv src)
+{
+    TCGv c0 = tcg_const_i32(0);
+    TCGv tmp = tcg_temp_new_i32();
+    tcg_gen_neg_i32(tmp, src);
+    tcg_gen_movcond_i32(TCG_COND_GT, dest, src, c0, src, tmp);
+    tcg_temp_free_i32(c0);
+    tcg_temp_free_i32(tmp);
+}
 
 static void shifter_out_im(TCGv var, int shift)
 {
commit ee6fa5593ebfc437cbedaf28de8b66b2e7d44e70
Author: Peter Maydell <peter.maydell at linaro.org>
Date:   Thu Oct 18 16:58:52 2012 +0100

    target-arm: Use TCG operation for Neon 64 bit negation
    
    Use the TCG operation to do Neon 64 bit negations rather than calling
    a helper routine for it.
    
    Reviewed-by: Aurelien Jarno <aurelien at aurel32.net>
    Signed-off-by: Peter Maydell <peter.maydell at linaro.org>

diff --git a/target-arm/helper.h b/target-arm/helper.h
index 8b9adf1..fa3472f 100644
--- a/target-arm/helper.h
+++ b/target-arm/helper.h
@@ -339,7 +339,6 @@ DEF_HELPER_2(neon_mull_s16, i64, i32, i32)
 
 DEF_HELPER_1(neon_negl_u16, i64, i64)
 DEF_HELPER_1(neon_negl_u32, i64, i64)
-DEF_HELPER_1(neon_negl_u64, i64, i64)
 
 DEF_HELPER_2(neon_qabs_s8, i32, env, i32)
 DEF_HELPER_2(neon_qabs_s16, i32, env, i32)
diff --git a/target-arm/neon_helper.c b/target-arm/neon_helper.c
index 9aa920d..89280b6 100644
--- a/target-arm/neon_helper.c
+++ b/target-arm/neon_helper.c
@@ -1664,12 +1664,6 @@ uint64_t HELPER(neon_negl_u32)(uint64_t x)
     return low | ((uint64_t)high << 32);
 }
 
-/* FIXME:  There should be a native op for this.  */
-uint64_t HELPER(neon_negl_u64)(uint64_t x)
-{
-    return -x;
-}
-
 /* Saturating sign manipulation.  */
 /* ??? Make these use NEON_VOP1 */
 #define DO_QABS8(x) do { \
diff --git a/target-arm/translate.c b/target-arm/translate.c
index daccb15..d33f94c 100644
--- a/target-arm/translate.c
+++ b/target-arm/translate.c
@@ -4184,7 +4184,9 @@ static inline void gen_neon_negl(TCGv_i64 var, int size)
     switch (size) {
     case 0: gen_helper_neon_negl_u16(var, var); break;
     case 1: gen_helper_neon_negl_u32(var, var); break;
-    case 2: gen_helper_neon_negl_u64(var, var); break;
+    case 2:
+        tcg_gen_neg_i64(var, var);
+        break;
     default: abort();
     }
 }
commit f296c0d172735612cad8af5fafb8dedeaaa2a109
Author: Peter Maydell <peter.maydell at linaro.org>
Date:   Sun Oct 14 09:52:27 2012 +0000

    arm-semi.c: Handle get/put_user() failure accessing arguments
    
    Rework the handling of arguments to ARM semihosting calls so that we
    handle a possible failure return from get_user_ual() or put_user_ual().
    (This incidentally silences a lot of warnings from clang about
    "expression result unused").
    
    Signed-off-by: Peter Maydell <peter.maydell at linaro.org>

diff --git a/target-arm/arm-semi.c b/target-arm/arm-semi.c
index 73bde58..7743d67 100644
--- a/target-arm/arm-semi.c
+++ b/target-arm/arm-semi.c
@@ -166,17 +166,20 @@ static void arm_semi_flen_cb(CPUARMState *env, target_ulong ret, target_ulong er
 #endif
 }
 
-#define ARG(n)					\
-({						\
-    target_ulong __arg;				\
-    /* FIXME - handle get_user() failure */	\
-    get_user_ual(__arg, args + (n) * 4);	\
-    __arg;					\
-})
+/* Read the input value from the argument block; fail the semihosting
+ * call if the memory read fails.
+ */
+#define GET_ARG(n) do {                                 \
+    if (get_user_ual(arg ## n, args + (n) * 4)) {       \
+        return (uint32_t)-1;                            \
+    }                                                   \
+} while (0)
+
 #define SET_ARG(n, val) put_user_ual(val, args + (n) * 4)
 uint32_t do_arm_semihosting(CPUARMState *env)
 {
     target_ulong args;
+    target_ulong arg0, arg1, arg2, arg3;
     char * s;
     int nr;
     uint32_t ret;
@@ -191,33 +194,39 @@ uint32_t do_arm_semihosting(CPUARMState *env)
     args = env->regs[1];
     switch (nr) {
     case TARGET_SYS_OPEN:
-        if (!(s = lock_user_string(ARG(0))))
+        GET_ARG(0);
+        GET_ARG(1);
+        GET_ARG(2);
+        s = lock_user_string(arg0);
+        if (!s) {
             /* FIXME - should this error code be -TARGET_EFAULT ? */
             return (uint32_t)-1;
-        if (ARG(1) >= 12) {
-            unlock_user(s, ARG(0), 0);
+        }
+        if (arg1 >= 12) {
+            unlock_user(s, arg0, 0);
             return (uint32_t)-1;
         }
         if (strcmp(s, ":tt") == 0) {
-            int result_fileno = ARG(1) < 4 ? STDIN_FILENO : STDOUT_FILENO;
-            unlock_user(s, ARG(0), 0);
+            int result_fileno = arg1 < 4 ? STDIN_FILENO : STDOUT_FILENO;
+            unlock_user(s, arg0, 0);
             return result_fileno;
         }
         if (use_gdb_syscalls()) {
-            gdb_do_syscall(arm_semi_cb, "open,%s,%x,1a4", ARG(0),
-			   (int)ARG(2)+1, gdb_open_modeflags[ARG(1)]);
+            gdb_do_syscall(arm_semi_cb, "open,%s,%x,1a4", arg0,
+                           (int)arg2+1, gdb_open_modeflags[arg1]);
             ret = env->regs[0];
         } else {
-            ret = set_swi_errno(ts, open(s, open_modeflags[ARG(1)], 0644));
+            ret = set_swi_errno(ts, open(s, open_modeflags[arg1], 0644));
         }
-        unlock_user(s, ARG(0), 0);
+        unlock_user(s, arg0, 0);
         return ret;
     case TARGET_SYS_CLOSE:
+        GET_ARG(0);
         if (use_gdb_syscalls()) {
-            gdb_do_syscall(arm_semi_cb, "close,%x", ARG(0));
+            gdb_do_syscall(arm_semi_cb, "close,%x", arg0);
             return env->regs[0];
         } else {
-            return set_swi_errno(ts, close(ARG(0)));
+            return set_swi_errno(ts, close(arg0));
         }
     case TARGET_SYS_WRITEC:
         {
@@ -248,35 +257,45 @@ uint32_t do_arm_semihosting(CPUARMState *env)
         unlock_user(s, args, 0);
         return ret;
     case TARGET_SYS_WRITE:
-        len = ARG(2);
+        GET_ARG(0);
+        GET_ARG(1);
+        GET_ARG(2);
+        len = arg2;
         if (use_gdb_syscalls()) {
             arm_semi_syscall_len = len;
-            gdb_do_syscall(arm_semi_cb, "write,%x,%x,%x", ARG(0), ARG(1), len);
+            gdb_do_syscall(arm_semi_cb, "write,%x,%x,%x", arg0, arg1, len);
             return env->regs[0];
         } else {
-            if (!(s = lock_user(VERIFY_READ, ARG(1), len, 1)))
+            s = lock_user(VERIFY_READ, arg1, len, 1);
+            if (!s) {
                 /* FIXME - should this error code be -TARGET_EFAULT ? */
                 return (uint32_t)-1;
-            ret = set_swi_errno(ts, write(ARG(0), s, len));
-            unlock_user(s, ARG(1), 0);
+            }
+            ret = set_swi_errno(ts, write(arg0, s, len));
+            unlock_user(s, arg1, 0);
             if (ret == (uint32_t)-1)
                 return -1;
             return len - ret;
         }
     case TARGET_SYS_READ:
-        len = ARG(2);
+        GET_ARG(0);
+        GET_ARG(1);
+        GET_ARG(2);
+        len = arg2;
         if (use_gdb_syscalls()) {
             arm_semi_syscall_len = len;
-            gdb_do_syscall(arm_semi_cb, "read,%x,%x,%x", ARG(0), ARG(1), len);
+            gdb_do_syscall(arm_semi_cb, "read,%x,%x,%x", arg0, arg1, len);
             return env->regs[0];
         } else {
-            if (!(s = lock_user(VERIFY_WRITE, ARG(1), len, 0)))
+            s = lock_user(VERIFY_WRITE, arg1, len, 0);
+            if (!s) {
                 /* FIXME - should this error code be -TARGET_EFAULT ? */
                 return (uint32_t)-1;
-            do
-              ret = set_swi_errno(ts, read(ARG(0), s, len));
-            while (ret == -1 && errno == EINTR);
-            unlock_user(s, ARG(1), len);
+            }
+            do {
+                ret = set_swi_errno(ts, read(arg0, s, len));
+            } while (ret == -1 && errno == EINTR);
+            unlock_user(s, arg1, len);
             if (ret == (uint32_t)-1)
                 return -1;
             return len - ret;
@@ -285,30 +304,34 @@ uint32_t do_arm_semihosting(CPUARMState *env)
        /* XXX: Read from debug console. Not implemented.  */
         return 0;
     case TARGET_SYS_ISTTY:
+        GET_ARG(0);
         if (use_gdb_syscalls()) {
-            gdb_do_syscall(arm_semi_cb, "isatty,%x", ARG(0));
+            gdb_do_syscall(arm_semi_cb, "isatty,%x", arg0);
             return env->regs[0];
         } else {
-            return isatty(ARG(0));
+            return isatty(arg0);
         }
     case TARGET_SYS_SEEK:
+        GET_ARG(0);
+        GET_ARG(1);
         if (use_gdb_syscalls()) {
-            gdb_do_syscall(arm_semi_cb, "lseek,%x,%x,0", ARG(0), ARG(1));
+            gdb_do_syscall(arm_semi_cb, "lseek,%x,%x,0", arg0, arg1);
             return env->regs[0];
         } else {
-            ret = set_swi_errno(ts, lseek(ARG(0), ARG(1), SEEK_SET));
+            ret = set_swi_errno(ts, lseek(arg0, arg1, SEEK_SET));
             if (ret == (uint32_t)-1)
               return -1;
             return 0;
         }
     case TARGET_SYS_FLEN:
+        GET_ARG(0);
         if (use_gdb_syscalls()) {
             gdb_do_syscall(arm_semi_flen_cb, "fstat,%x,%x",
-			   ARG(0), env->regs[13]-64);
+                           arg0, env->regs[13]-64);
             return env->regs[0];
         } else {
             struct stat buf;
-            ret = set_swi_errno(ts, fstat(ARG(0), &buf));
+            ret = set_swi_errno(ts, fstat(arg0, &buf));
             if (ret == (uint32_t)-1)
                 return -1;
             return buf.st_size;
@@ -317,35 +340,43 @@ uint32_t do_arm_semihosting(CPUARMState *env)
         /* XXX: Not implemented.  */
         return -1;
     case TARGET_SYS_REMOVE:
+        GET_ARG(0);
+        GET_ARG(1);
         if (use_gdb_syscalls()) {
-            gdb_do_syscall(arm_semi_cb, "unlink,%s", ARG(0), (int)ARG(1)+1);
+            gdb_do_syscall(arm_semi_cb, "unlink,%s", arg0, (int)arg1+1);
             ret = env->regs[0];
         } else {
-            if (!(s = lock_user_string(ARG(0))))
+            s = lock_user_string(arg0);
+            if (!s) {
                 /* FIXME - should this error code be -TARGET_EFAULT ? */
                 return (uint32_t)-1;
+            }
             ret =  set_swi_errno(ts, remove(s));
-            unlock_user(s, ARG(0), 0);
+            unlock_user(s, arg0, 0);
         }
         return ret;
     case TARGET_SYS_RENAME:
+        GET_ARG(0);
+        GET_ARG(1);
+        GET_ARG(2);
+        GET_ARG(3);
         if (use_gdb_syscalls()) {
             gdb_do_syscall(arm_semi_cb, "rename,%s,%s",
-                           ARG(0), (int)ARG(1)+1, ARG(2), (int)ARG(3)+1);
+                           arg0, (int)arg1+1, arg2, (int)arg3+1);
             return env->regs[0];
         } else {
             char *s2;
-            s = lock_user_string(ARG(0));
-            s2 = lock_user_string(ARG(2));
+            s = lock_user_string(arg0);
+            s2 = lock_user_string(arg2);
             if (!s || !s2)
                 /* FIXME - should this error code be -TARGET_EFAULT ? */
                 ret = (uint32_t)-1;
             else
                 ret = set_swi_errno(ts, rename(s, s2));
             if (s2)
-                unlock_user(s2, ARG(2), 0);
+                unlock_user(s2, arg2, 0);
             if (s)
-                unlock_user(s, ARG(0), 0);
+                unlock_user(s, arg0, 0);
             return ret;
         }
     case TARGET_SYS_CLOCK:
@@ -353,15 +384,19 @@ uint32_t do_arm_semihosting(CPUARMState *env)
     case TARGET_SYS_TIME:
         return set_swi_errno(ts, time(NULL));
     case TARGET_SYS_SYSTEM:
+        GET_ARG(0);
+        GET_ARG(1);
         if (use_gdb_syscalls()) {
-            gdb_do_syscall(arm_semi_cb, "system,%s", ARG(0), (int)ARG(1)+1);
+            gdb_do_syscall(arm_semi_cb, "system,%s", arg0, (int)arg1+1);
             return env->regs[0];
         } else {
-            if (!(s = lock_user_string(ARG(0))))
+            s = lock_user_string(arg0);
+            if (!s) {
                 /* FIXME - should this error code be -TARGET_EFAULT ? */
                 return (uint32_t)-1;
+            }
             ret = set_swi_errno(ts, system(s));
-            unlock_user(s, ARG(0), 0);
+            unlock_user(s, arg0, 0);
             return ret;
         }
     case TARGET_SYS_ERRNO:
@@ -375,22 +410,24 @@ uint32_t do_arm_semihosting(CPUARMState *env)
             /* Build a command-line from the original argv.
              *
              * The inputs are:
-             *     * ARG(0), pointer to a buffer of at least the size
-             *               specified in ARG(1).
-             *     * ARG(1), size of the buffer pointed to by ARG(0) in
+             *     * arg0, pointer to a buffer of at least the size
+             *               specified in arg1.
+             *     * arg1, size of the buffer pointed to by arg0 in
              *               bytes.
              *
              * The outputs are:
-             *     * ARG(0), pointer to null-terminated string of the
+             *     * arg0, pointer to null-terminated string of the
              *               command line.
-             *     * ARG(1), length of the string pointed to by ARG(0).
+             *     * arg1, length of the string pointed to by arg0.
              */
 
             char *output_buffer;
-            size_t input_size = ARG(1);
+            size_t input_size;
             size_t output_size;
             int status = 0;
-
+            GET_ARG(0);
+            GET_ARG(1);
+            input_size = arg1;
             /* Compute the size of the output string.  */
 #if !defined(CONFIG_USER_ONLY)
             output_size = strlen(ts->boot_info->kernel_filename)
@@ -414,10 +451,13 @@ uint32_t do_arm_semihosting(CPUARMState *env)
             }
 
             /* Adjust the command-line length.  */
-            SET_ARG(1, output_size - 1);
+            if (SET_ARG(1, output_size - 1)) {
+                /* Couldn't write back to argument block */
+                return -1;
+            }
 
             /* Lock the buffer on the ARM side.  */
-            output_buffer = lock_user(VERIFY_WRITE, ARG(0), output_size, 0);
+            output_buffer = lock_user(VERIFY_WRITE, arg0, output_size, 0);
             if (!output_buffer) {
                 return -1;
             }
@@ -449,7 +489,7 @@ uint32_t do_arm_semihosting(CPUARMState *env)
         out:
 #endif
             /* Unlock the buffer on the ARM side.  */
-            unlock_user(output_buffer, ARG(0), output_size);
+            unlock_user(output_buffer, arg0, output_size);
 
             return status;
         }
@@ -457,6 +497,7 @@ uint32_t do_arm_semihosting(CPUARMState *env)
         {
             uint32_t *ptr;
             uint32_t limit;
+            GET_ARG(0);
 
 #ifdef CONFIG_USER_ONLY
             /* Some C libraries assume the heap immediately follows .bss, so
@@ -477,25 +518,29 @@ uint32_t do_arm_semihosting(CPUARMState *env)
                 ts->heap_limit = limit;
             }
 
-            if (!(ptr = lock_user(VERIFY_WRITE, ARG(0), 16, 0)))
+            ptr = lock_user(VERIFY_WRITE, arg0, 16, 0);
+            if (!ptr) {
                 /* FIXME - should this error code be -TARGET_EFAULT ? */
                 return (uint32_t)-1;
+            }
             ptr[0] = tswap32(ts->heap_base);
             ptr[1] = tswap32(ts->heap_limit);
             ptr[2] = tswap32(ts->stack_base);
             ptr[3] = tswap32(0); /* Stack limit.  */
-            unlock_user(ptr, ARG(0), 16);
+            unlock_user(ptr, arg0, 16);
 #else
             limit = ram_size;
-            if (!(ptr = lock_user(VERIFY_WRITE, ARG(0), 16, 0)))
+            ptr = lock_user(VERIFY_WRITE, arg0, 16, 0);
+            if (!ptr) {
                 /* FIXME - should this error code be -TARGET_EFAULT ? */
                 return (uint32_t)-1;
+            }
             /* TODO: Make this use the limit of the loaded application.  */
             ptr[0] = tswap32(limit / 2);
             ptr[1] = tswap32(limit);
             ptr[2] = tswap32(limit); /* Stack base */
             ptr[3] = tswap32(0); /* Stack limit.  */
-            unlock_user(ptr, ARG(0), 16);
+            unlock_user(ptr, arg0, 16);
 #endif
             return 0;
         }
commit 3b6eda2f57a5b7ed047077b6272c2b5a9e3531ca
Author: Corey Bryant <coreyb at linux.vnet.ibm.com>
Date:   Thu Oct 18 16:41:04 2012 -0400

    osdep: Less restrictive F_SEFL in qemu_dup_flags()
    
    qemu_dup_flags() currently limits the flags that can be set on the
    fcntl() F_SETFL call to those that we currently know can be set with
    fcntl() F_SETFL.  The problem with this is that it will prevent use
    of new flags in the future without a code update.
    
    This patch relaxes the checking and lets fcntl() determine the flags
    it can set.
    
    Signed-off-by: Corey Bryant <coreyb at linux.vnet.ibm.com>
    Signed-off-by: Kevin Wolf <kwolf at redhat.com>

diff --git a/osdep.c b/osdep.c
index 3b25297..c822a01 100644
--- a/osdep.c
+++ b/osdep.c
@@ -88,7 +88,6 @@ static int qemu_dup_flags(int fd, int flags)
     int ret;
     int serrno;
     int dup_flags;
-    int setfl_flags;
 
 #ifdef F_DUPFD_CLOEXEC
     ret = fcntl(fd, F_DUPFD_CLOEXEC, 0);
@@ -113,16 +112,7 @@ static int qemu_dup_flags(int fd, int flags)
     }
 
     /* Set/unset flags that we can with fcntl */
-    setfl_flags = O_APPEND | O_ASYNC | O_NONBLOCK;
-#ifdef O_NOATIME
-    setfl_flags |= O_NOATIME;
-#endif
-#ifdef O_DIRECT
-    setfl_flags |= O_DIRECT;
-#endif
-    dup_flags &= ~setfl_flags;
-    dup_flags |= (flags & setfl_flags);
-    if (fcntl(ret, F_SETFL, dup_flags) == -1) {
+    if (fcntl(ret, F_SETFL, flags) == -1) {
         goto fail;
     }
 
commit 9dfa9f593045d70572ebfd94f59cd33ec99e847c
Author: Paolo Bonzini <pbonzini at redhat.com>
Date:   Tue Oct 23 16:39:23 2012 +0200

    qemu-iotests: add testcases for mirroring on-source-error/on-target-error
    
    The new options are tested with blkdebug on both the source and the
    target.
    
    Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>
    Signed-off-by: Kevin Wolf <kwolf at redhat.com>

diff --git a/tests/qemu-iotests/041 b/tests/qemu-iotests/041
index fd9760a..c6eb851 100755
--- a/tests/qemu-iotests/041
+++ b/tests/qemu-iotests/041
@@ -292,6 +292,259 @@ class TestMirrorNoBacking(ImageMirroringTestCase):
         self.assertTrue(self.compare_images(test_img, target_img),
                         'target image does not match source after mirroring')
 
+class TestReadErrors(ImageMirroringTestCase):
+    image_len = 2 * 1024 * 1024 # MB
+
+    # this should be a multiple of twice the default granularity
+    # so that we hit this offset first in state 1
+    MIRROR_GRANULARITY = 1024 * 1024
+
+    def create_blkdebug_file(self, name, event, errno):
+        file = open(name, 'w')
+        file.write('''
+[inject-error]
+state = "1"
+event = "%s"
+errno = "%d"
+immediately = "off"
+once = "on"
+sector = "%d"
+
+[set-state]
+state = "1"
+event = "%s"
+new_state = "2"
+
+[set-state]
+state = "2"
+event = "%s"
+new_state = "1"
+''' % (event, errno, self.MIRROR_GRANULARITY / 512, event, event))
+        file.close()
+
+    def setUp(self):
+        self.blkdebug_file = backing_img + ".blkdebug"
+        self.create_image(backing_img, TestReadErrors.image_len)
+        self.create_blkdebug_file(self.blkdebug_file, "read_aio", 5)
+        qemu_img('create', '-f', iotests.imgfmt,
+                 '-o', 'backing_file=blkdebug:%s:%s,backing_fmt=raw'
+                       % (self.blkdebug_file, backing_img),
+                 test_img)
+        self.vm = iotests.VM().add_drive(test_img)
+        self.vm.launch()
+
+    def tearDown(self):
+        self.vm.shutdown()
+        os.remove(test_img)
+        os.remove(backing_img)
+        os.remove(self.blkdebug_file)
+
+    def test_report_read(self):
+        self.assert_no_active_mirrors()
+
+        result = self.vm.qmp('drive-mirror', device='drive0', sync='full',
+                             target=target_img)
+        self.assert_qmp(result, 'return', {})
+
+        completed = False
+        error = False
+        while not completed:
+            for event in self.vm.get_qmp_events(wait=True):
+                if event['event'] == 'BLOCK_JOB_ERROR':
+                    self.assert_qmp(event, 'data/device', 'drive0')
+                    self.assert_qmp(event, 'data/operation', 'read')
+                    error = True
+                elif event['event'] == 'BLOCK_JOB_READY':
+                    self.assertTrue(False, 'job completed unexpectedly')
+                elif event['event'] == 'BLOCK_JOB_COMPLETED':
+                    self.assertTrue(error, 'job completed unexpectedly')
+                    self.assert_qmp(event, 'data/type', 'mirror')
+                    self.assert_qmp(event, 'data/device', 'drive0')
+                    self.assert_qmp(event, 'data/error', 'Input/output error')
+                    self.assert_qmp(event, 'data/len', self.image_len)
+                    completed = True
+
+        self.assert_no_active_mirrors()
+        self.vm.shutdown()
+
+    def test_ignore_read(self):
+        self.assert_no_active_mirrors()
+
+        result = self.vm.qmp('drive-mirror', device='drive0', sync='full',
+                             target=target_img, on_source_error='ignore')
+        self.assert_qmp(result, 'return', {})
+
+        event = self.vm.get_qmp_event(wait=True)
+        self.assertEquals(event['event'], 'BLOCK_JOB_ERROR')
+        self.assert_qmp(event, 'data/device', 'drive0')
+        self.assert_qmp(event, 'data/operation', 'read')
+        result = self.vm.qmp('query-block-jobs')
+        self.assert_qmp(result, 'return[0]/paused', False)
+        self.complete_and_wait()
+        self.vm.shutdown()
+
+    def test_stop_read(self):
+        self.assert_no_active_mirrors()
+
+        result = self.vm.qmp('drive-mirror', device='drive0', sync='full',
+                             target=target_img, on_source_error='stop')
+        self.assert_qmp(result, 'return', {})
+
+        error = False
+        ready = False
+        while not ready:
+            for event in self.vm.get_qmp_events(wait=True):
+                if event['event'] == 'BLOCK_JOB_ERROR':
+                    self.assert_qmp(event, 'data/device', 'drive0')
+                    self.assert_qmp(event, 'data/operation', 'read')
+
+                    result = self.vm.qmp('query-block-jobs')
+                    self.assert_qmp(result, 'return[0]/paused', True)
+                    self.assert_qmp(result, 'return[0]/io-status', 'failed')
+
+                    result = self.vm.qmp('block-job-resume', device='drive0')
+                    self.assert_qmp(result, 'return', {})
+                    error = True
+                elif event['event'] == 'BLOCK_JOB_READY':
+                    self.assertTrue(error, 'job completed unexpectedly')
+                    self.assert_qmp(event, 'data/device', 'drive0')
+                    ready = True
+
+        result = self.vm.qmp('query-block-jobs')
+        self.assert_qmp(result, 'return[0]/paused', False)
+        self.assert_qmp(result, 'return[0]/io-status', 'ok')
+
+        self.complete_and_wait(wait_ready=False)
+        self.assert_no_active_mirrors()
+        self.vm.shutdown()
+
+class TestWriteErrors(ImageMirroringTestCase):
+    image_len = 2 * 1024 * 1024 # MB
+
+    # this should be a multiple of twice the default granularity
+    # so that we hit this offset first in state 1
+    MIRROR_GRANULARITY = 1024 * 1024
+
+    def create_blkdebug_file(self, name, event, errno):
+        file = open(name, 'w')
+        file.write('''
+[inject-error]
+state = "1"
+event = "%s"
+errno = "%d"
+immediately = "off"
+once = "on"
+sector = "%d"
+
+[set-state]
+state = "1"
+event = "%s"
+new_state = "2"
+
+[set-state]
+state = "2"
+event = "%s"
+new_state = "1"
+''' % (event, errno, self.MIRROR_GRANULARITY / 512, event, event))
+        file.close()
+
+    def setUp(self):
+        self.blkdebug_file = target_img + ".blkdebug"
+        self.create_image(backing_img, TestWriteErrors.image_len)
+        self.create_blkdebug_file(self.blkdebug_file, "write_aio", 5)
+        qemu_img('create', '-f', iotests.imgfmt, '-obacking_file=%s' %(backing_img), test_img)
+        self.vm = iotests.VM().add_drive(test_img)
+        self.target_img = 'blkdebug:%s:%s' % (self.blkdebug_file, target_img)
+        qemu_img('create', '-f', iotests.imgfmt, '-osize=%d' %(TestWriteErrors.image_len), target_img)
+        self.vm.launch()
+
+    def tearDown(self):
+        self.vm.shutdown()
+        os.remove(test_img)
+        os.remove(backing_img)
+        os.remove(self.blkdebug_file)
+
+    def test_report_write(self):
+        self.assert_no_active_mirrors()
+
+        result = self.vm.qmp('drive-mirror', device='drive0', sync='full',
+                             mode='existing', target=self.target_img)
+        self.assert_qmp(result, 'return', {})
+
+        completed = False
+        error = False
+        while not completed:
+            for event in self.vm.get_qmp_events(wait=True):
+                if event['event'] == 'BLOCK_JOB_ERROR':
+                    self.assert_qmp(event, 'data/device', 'drive0')
+                    self.assert_qmp(event, 'data/operation', 'write')
+                    error = True
+                elif event['event'] == 'BLOCK_JOB_READY':
+                    self.assertTrue(False, 'job completed unexpectedly')
+                elif event['event'] == 'BLOCK_JOB_COMPLETED':
+                    self.assertTrue(error, 'job completed unexpectedly')
+                    self.assert_qmp(event, 'data/type', 'mirror')
+                    self.assert_qmp(event, 'data/device', 'drive0')
+                    self.assert_qmp(event, 'data/error', 'Input/output error')
+                    self.assert_qmp(event, 'data/len', self.image_len)
+                    completed = True
+
+        self.assert_no_active_mirrors()
+        self.vm.shutdown()
+
+    def test_ignore_write(self):
+        self.assert_no_active_mirrors()
+
+        result = self.vm.qmp('drive-mirror', device='drive0', sync='full',
+                             mode='existing', target=self.target_img,
+                             on_target_error='ignore')
+        self.assert_qmp(result, 'return', {})
+
+        event = self.vm.get_qmp_event(wait=True)
+        self.assertEquals(event['event'], 'BLOCK_JOB_ERROR')
+        self.assert_qmp(event, 'data/device', 'drive0')
+        self.assert_qmp(event, 'data/operation', 'write')
+        result = self.vm.qmp('query-block-jobs')
+        self.assert_qmp(result, 'return[0]/paused', False)
+        self.complete_and_wait()
+        self.vm.shutdown()
+
+    def test_stop_write(self):
+        self.assert_no_active_mirrors()
+
+        result = self.vm.qmp('drive-mirror', device='drive0', sync='full',
+                             mode='existing', target=self.target_img,
+                             on_target_error='stop')
+        self.assert_qmp(result, 'return', {})
+
+        error = False
+        ready = False
+        while not ready:
+            for event in self.vm.get_qmp_events(wait=True):
+                if event['event'] == 'BLOCK_JOB_ERROR':
+                    self.assert_qmp(event, 'data/device', 'drive0')
+                    self.assert_qmp(event, 'data/operation', 'write')
+
+                    result = self.vm.qmp('query-block-jobs')
+                    self.assert_qmp(result, 'return[0]/paused', True)
+                    self.assert_qmp(result, 'return[0]/io-status', 'failed')
+
+                    result = self.vm.qmp('block-job-resume', device='drive0')
+                    self.assert_qmp(result, 'return', {})
+
+                    result = self.vm.qmp('query-block-jobs')
+                    self.assert_qmp(result, 'return[0]/paused', False)
+                    self.assert_qmp(result, 'return[0]/io-status', 'ok')
+                    error = True
+                elif event['event'] == 'BLOCK_JOB_READY':
+                    self.assertTrue(error, 'job completed unexpectedly')
+                    self.assert_qmp(event, 'data/device', 'drive0')
+                    ready = True
+
+        self.complete_and_wait(wait_ready=False)
+        self.assert_no_active_mirrors()
+        self.vm.shutdown()
+
 class TestSetSpeed(ImageMirroringTestCase):
     image_len = 80 * 1024 * 1024 # MB
 
diff --git a/tests/qemu-iotests/041.out b/tests/qemu-iotests/041.out
index 281b69e..71009c2 100644
--- a/tests/qemu-iotests/041.out
+++ b/tests/qemu-iotests/041.out
@@ -1,5 +1,5 @@
-............
+..................
 ----------------------------------------------------------------------
-Ran 12 tests
+Ran 18 tests
 
 OK
diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py
index 3c60b2d..735c674 100644
--- a/tests/qemu-iotests/iotests.py
+++ b/tests/qemu-iotests/iotests.py
@@ -106,6 +106,10 @@ class VM(object):
 
         return self._qmp.cmd(cmd, args=qmp_args)
 
+    def get_qmp_event(self, wait=False):
+        '''Poll for one queued QMP events and return it'''
+        return self._qmp.pull_event(wait=wait)
+
     def get_qmp_events(self, wait=False):
         '''Poll for queued QMP events and return a list of dicts'''
         events = self._qmp.get_events(wait=wait)
commit 9eb80eadd498fae2ad9eecbb89646b0cc4929cac
Author: Paolo Bonzini <pbonzini at redhat.com>
Date:   Thu Oct 18 16:49:29 2012 +0200

    qmp: add pull_event function
    
    This function is unlike get_events in that it makes it easy to process
    one event at a time.  This is useful in the mirroring test cases, where
    we want to process just one event (BLOCK_JOB_ERROR) and leave the others
    to a helper function.
    
    Acked-by: Luiz Capitulino <lcapitulino at redhat.com>
    Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>
    Signed-off-by: Kevin Wolf <kwolf at redhat.com>

diff --git a/QMP/qmp.py b/QMP/qmp.py
index 32510a1..c551df1 100644
--- a/QMP/qmp.py
+++ b/QMP/qmp.py
@@ -136,6 +136,26 @@ class QEMUMonitorProtocol:
             raise Exception(ret['error']['desc'])
         return ret['return']
 
+    def pull_event(self, wait=False):
+        """
+        Get and delete the first available QMP event.
+
+        @param wait: block until an event is available (bool)
+        """
+        self.__sock.setblocking(0)
+        try:
+            self.__json_read()
+        except socket.error, err:
+            if err[0] == errno.EAGAIN:
+                # No data available
+                pass
+        self.__sock.setblocking(1)
+        if not self.__events and wait:
+            self.__json_read(only_event=True)
+        event = self.__events[0]
+        del self.__events[0]
+        return event
+
     def get_events(self, wait=False):
         """
         Get a list of available QMP events.
commit b952b5589a36114e06201c0d2e82c293dbad2b1f
Author: Paolo Bonzini <pbonzini at redhat.com>
Date:   Thu Oct 18 16:49:28 2012 +0200

    mirror: add support for on-source-error/on-target-error
    
    Error management is important for mirroring; otherwise, an error on the
    target (even something as "innocent" as ENOSPC) requires to start again
    with a full copy.  Similar to on_read_error/on_write_error, two separate
    knobs are provided for on_source_error (reads) and on_target_error (writes).
    The default is 'report' for both.
    
    The 'ignore' policy will leave the sector dirty, so that it will be
    retried later.  Thus, it will not cause corruption.
    
    Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>
    Signed-off-by: Kevin Wolf <kwolf at redhat.com>

diff --git a/block/mirror.c b/block/mirror.c
index 6320f6a..d6618a4 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -32,13 +32,28 @@ typedef struct MirrorBlockJob {
     RateLimit limit;
     BlockDriverState *target;
     MirrorSyncMode mode;
+    BlockdevOnError on_source_error, on_target_error;
     bool synced;
     bool should_complete;
     int64_t sector_num;
     uint8_t *buf;
 } MirrorBlockJob;
 
-static int coroutine_fn mirror_iteration(MirrorBlockJob *s)
+static BlockErrorAction mirror_error_action(MirrorBlockJob *s, bool read,
+                                            int error)
+{
+    s->synced = false;
+    if (read) {
+        return block_job_error_action(&s->common, s->common.bs,
+                                      s->on_source_error, true, error);
+    } else {
+        return block_job_error_action(&s->common, s->target,
+                                      s->on_target_error, false, error);
+    }
+}
+
+static int coroutine_fn mirror_iteration(MirrorBlockJob *s,
+                                         BlockErrorAction *p_action)
 {
     BlockDriverState *source = s->common.bs;
     BlockDriverState *target = s->target;
@@ -60,9 +75,21 @@ static int coroutine_fn mirror_iteration(MirrorBlockJob *s)
     trace_mirror_one_iteration(s, s->sector_num, nb_sectors);
     ret = bdrv_co_readv(source, s->sector_num, nb_sectors, &qiov);
     if (ret < 0) {
-        return ret;
+        *p_action = mirror_error_action(s, true, -ret);
+        goto fail;
+    }
+    ret = bdrv_co_writev(target, s->sector_num, nb_sectors, &qiov);
+    if (ret < 0) {
+        *p_action = mirror_error_action(s, false, -ret);
+        s->synced = false;
+        goto fail;
     }
-    return bdrv_co_writev(target, s->sector_num, nb_sectors, &qiov);
+    return 0;
+
+fail:
+    /* Try again later.  */
+    bdrv_set_dirty(source, s->sector_num, nb_sectors);
+    return ret;
 }
 
 static void coroutine_fn mirror_run(void *opaque)
@@ -117,8 +144,9 @@ static void coroutine_fn mirror_run(void *opaque)
 
         cnt = bdrv_get_dirty_count(bs);
         if (cnt != 0) {
-            ret = mirror_iteration(s);
-            if (ret < 0) {
+            BlockErrorAction action = BDRV_ACTION_REPORT;
+            ret = mirror_iteration(s, &action);
+            if (ret < 0 && action == BDRV_ACTION_REPORT) {
                 goto immediate_exit;
             }
             cnt = bdrv_get_dirty_count(bs);
@@ -129,23 +157,25 @@ static void coroutine_fn mirror_run(void *opaque)
             trace_mirror_before_flush(s);
             ret = bdrv_flush(s->target);
             if (ret < 0) {
-                goto immediate_exit;
-            }
-
-            /* We're out of the streaming phase.  From now on, if the job
-             * is cancelled we will actually complete all pending I/O and
-             * report completion.  This way, block-job-cancel will leave
-             * the target in a consistent state.
-             */
-            s->common.offset = end * BDRV_SECTOR_SIZE;
-            if (!s->synced) {
-                block_job_ready(&s->common);
-                s->synced = true;
+                if (mirror_error_action(s, false, -ret) == BDRV_ACTION_REPORT) {
+                    goto immediate_exit;
+                }
+            } else {
+                /* We're out of the streaming phase.  From now on, if the job
+                 * is cancelled we will actually complete all pending I/O and
+                 * report completion.  This way, block-job-cancel will leave
+                 * the target in a consistent state.
+                 */
+                s->common.offset = end * BDRV_SECTOR_SIZE;
+                if (!s->synced) {
+                    block_job_ready(&s->common);
+                    s->synced = true;
+                }
+
+                should_complete = s->should_complete ||
+                    block_job_is_cancelled(&s->common);
+                cnt = bdrv_get_dirty_count(bs);
             }
-
-            should_complete = s->should_complete ||
-                block_job_is_cancelled(&s->common);
-            cnt = bdrv_get_dirty_count(bs);
         }
 
         if (cnt == 0 && should_complete) {
@@ -197,6 +227,7 @@ static void coroutine_fn mirror_run(void *opaque)
 immediate_exit:
     g_free(s->buf);
     bdrv_set_dirty_tracking(bs, false);
+    bdrv_iostatus_disable(s->target);
     if (s->should_complete && ret == 0) {
         if (bdrv_get_flags(s->target) != bdrv_get_flags(s->common.bs)) {
             bdrv_reopen(s->target, bdrv_get_flags(s->common.bs), NULL);
@@ -219,6 +250,13 @@ static void mirror_set_speed(BlockJob *job, int64_t speed, Error **errp)
     ratelimit_set_speed(&s->limit, speed / BDRV_SECTOR_SIZE, SLICE_TIME);
 }
 
+static void mirror_iostatus_reset(BlockJob *job)
+{
+    MirrorBlockJob *s = container_of(job, MirrorBlockJob, common);
+
+    bdrv_iostatus_reset(s->target);
+}
+
 static void mirror_complete(BlockJob *job, Error **errp)
 {
     MirrorBlockJob *s = container_of(job, MirrorBlockJob, common);
@@ -245,25 +283,39 @@ static BlockJobType mirror_job_type = {
     .instance_size = sizeof(MirrorBlockJob),
     .job_type      = "mirror",
     .set_speed     = mirror_set_speed,
+    .iostatus_reset= mirror_iostatus_reset,
     .complete      = mirror_complete,
 };
 
 void mirror_start(BlockDriverState *bs, BlockDriverState *target,
                   int64_t speed, MirrorSyncMode mode,
+                  BlockdevOnError on_source_error,
+                  BlockdevOnError on_target_error,
                   BlockDriverCompletionFunc *cb,
                   void *opaque, Error **errp)
 {
     MirrorBlockJob *s;
 
+    if ((on_source_error == BLOCKDEV_ON_ERROR_STOP ||
+         on_source_error == BLOCKDEV_ON_ERROR_ENOSPC) &&
+        !bdrv_iostatus_is_enabled(bs)) {
+        error_set(errp, QERR_INVALID_PARAMETER, "on-source-error");
+        return;
+    }
+
     s = block_job_create(&mirror_job_type, bs, speed, cb, opaque, errp);
     if (!s) {
         return;
     }
 
+    s->on_source_error = on_source_error;
+    s->on_target_error = on_target_error;
     s->target = target;
     s->mode = mode;
     bdrv_set_dirty_tracking(bs, true);
     bdrv_set_enable_write_cache(s->target, true);
+    bdrv_set_on_error(s->target, on_target_error, on_target_error);
+    bdrv_iostatus_enable(s->target);
     s->common.co = qemu_coroutine_create(mirror_run);
     trace_mirror_start(bs, s, s->common.co, opaque);
     qemu_coroutine_enter(s->common.co, s);
diff --git a/block_int.h b/block_int.h
index aaa46a8..00204eb 100644
--- a/block_int.h
+++ b/block_int.h
@@ -337,6 +337,8 @@ void commit_start(BlockDriverState *bs, BlockDriverState *base,
  * @target: Block device to write to.
  * @speed: The maximum speed, in bytes per second, or 0 for unlimited.
  * @mode: Whether to collapse all images in the chain to the target.
+ * @on_source_error: The action to take upon error reading from the source.
+ * @on_target_error: The action to take upon error writing to the target.
  * @cb: Completion function for the job.
  * @opaque: Opaque pointer value passed to @cb.
  * @errp: Error object.
@@ -348,6 +350,8 @@ void commit_start(BlockDriverState *bs, BlockDriverState *base,
  */
 void mirror_start(BlockDriverState *bs, BlockDriverState *target,
                   int64_t speed, MirrorSyncMode mode,
+                  BlockdevOnError on_source_error,
+                  BlockdevOnError on_target_error,
                   BlockDriverCompletionFunc *cb,
                   void *opaque, Error **errp);
 
diff --git a/blockdev.c b/blockdev.c
index 431c678..a068a4b 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -1185,7 +1185,10 @@ void qmp_drive_mirror(const char *device, const char *target,
                       bool has_format, const char *format,
                       enum MirrorSyncMode sync,
                       bool has_mode, enum NewImageMode mode,
-                      bool has_speed, int64_t speed, Error **errp)
+                      bool has_speed, int64_t speed,
+                      bool has_on_source_error, BlockdevOnError on_source_error,
+                      bool has_on_target_error, BlockdevOnError on_target_error,
+                      Error **errp)
 {
     BlockDriverInfo bdi;
     BlockDriverState *bs;
@@ -1200,6 +1203,12 @@ void qmp_drive_mirror(const char *device, const char *target,
     if (!has_speed) {
         speed = 0;
     }
+    if (!has_on_source_error) {
+        on_source_error = BLOCKDEV_ON_ERROR_REPORT;
+    }
+    if (!has_on_target_error) {
+        on_target_error = BLOCKDEV_ON_ERROR_REPORT;
+    }
     if (!has_mode) {
         mode = NEW_IMAGE_MODE_ABSOLUTE_PATHS;
     }
@@ -1292,7 +1301,8 @@ void qmp_drive_mirror(const char *device, const char *target,
         }
     }
 
-    mirror_start(bs, target_bs, speed, sync, block_job_cb, bs, &local_err);
+    mirror_start(bs, target_bs, speed, sync, on_source_error, on_target_error,
+                 block_job_cb, bs, &local_err);
     if (local_err != NULL) {
         bdrv_delete(target_bs);
         error_propagate(errp, local_err);
diff --git a/hmp.c b/hmp.c
index e530253..4a458ac 100644
--- a/hmp.c
+++ b/hmp.c
@@ -795,7 +795,8 @@ void hmp_drive_mirror(Monitor *mon, const QDict *qdict)
 
     qmp_drive_mirror(device, filename, !!format, format,
                      full ? MIRROR_SYNC_MODE_FULL : MIRROR_SYNC_MODE_TOP,
-                     true, mode, false, 0, &errp);
+                     true, mode, false, 0,
+                     false, 0, false, 0, &errp);
     hmp_handle_error(mon, &errp);
 }
 
diff --git a/qapi-schema.json b/qapi-schema.json
index a066cd5..6aa443e 100644
--- a/qapi-schema.json
+++ b/qapi-schema.json
@@ -1629,6 +1629,14 @@
 #        (all the disk, only the sectors allocated in the topmost image, or
 #        only new I/O).
 #
+# @on-source-error: #optional the action to take on an error on the source,
+#                   default 'report'.  'stop' and 'enospc' can only be used
+#                   if the block device supports io-status (see BlockInfo).
+#
+# @on-target-error: #optional the action to take on an error on the target,
+#                   default 'report' (no limitations, since this applies to
+#                   a different block device than @device).
+#
 # Returns: nothing on success
 #          If @device is not a valid block device, DeviceNotFound
 #
@@ -1637,7 +1645,8 @@
 { 'command': 'drive-mirror',
   'data': { 'device': 'str', 'target': 'str', '*format': 'str',
             'sync': 'MirrorSyncMode', '*mode': 'NewImageMode',
-            '*speed': 'int' } }
+            '*speed': 'int', '*on-source-error': 'BlockdevOnError',
+            '*on-target-error': 'BlockdevOnError' } }
 
 ##
 # @migrate_cancel
diff --git a/qmp-commands.hx b/qmp-commands.hx
index 614baea..c31312f 100644
--- a/qmp-commands.hx
+++ b/qmp-commands.hx
@@ -937,7 +937,8 @@ EQMP
 
     {
         .name       = "drive-mirror",
-        .args_type  = "sync:s,device:B,target:s,speed:i?,mode:s?,format:s?",
+        .args_type  = "sync:s,device:B,target:s,speed:i?,mode:s?,format:s?,"
+                      "on-source-error:s?,on-target-error:s?",
         .mhandler.cmd_new = qmp_marshal_input_drive_mirror,
     },
 
@@ -965,6 +966,11 @@ Arguments:
   possibilities include "full" for all the disk, "top" for only the sectors
   allocated in the topmost image, or "none" to only replicate new I/O
   (MirrorSyncMode).
+- "on-source-error": the action to take on an error on the source
+  (BlockdevOnError, default 'report')
+- "on-target-error": the action to take on an error on the target
+  (BlockdevOnError, default 'report')
+
 
 
 Example:
commit 3bd293c3fdc8b4052b9fc357e0b28cba20e73099
Author: Paolo Bonzini <pbonzini at redhat.com>
Date:   Thu Oct 18 16:49:27 2012 +0200

    iostatus: forward block_job_iostatus_reset to block job
    
    Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>
    Signed-off-by: Kevin Wolf <kwolf at redhat.com>

diff --git a/block.c b/block.c
index ad5e240..1564137 100644
--- a/block.c
+++ b/block.c
@@ -4369,6 +4369,9 @@ void bdrv_iostatus_reset(BlockDriverState *bs)
 {
     if (bdrv_iostatus_is_enabled(bs)) {
         bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
+        if (bs->job) {
+            block_job_iostatus_reset(bs->job);
+        }
     }
 }
 
diff --git a/blockjob.c b/blockjob.c
index fbb7e1c..cda12c6 100644
--- a/blockjob.c
+++ b/blockjob.c
@@ -142,6 +142,9 @@ bool block_job_is_cancelled(BlockJob *job)
 void block_job_iostatus_reset(BlockJob *job)
 {
     job->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
+    if (job->job_type->iostatus_reset) {
+        job->job_type->iostatus_reset(job);
+    }
 }
 
 struct BlockCancelData {
diff --git a/blockjob.h b/blockjob.h
index fb2392e..3792b73 100644
--- a/blockjob.h
+++ b/blockjob.h
@@ -42,6 +42,9 @@ typedef struct BlockJobType {
     /** Optional callback for job types that support setting a speed limit */
     void (*set_speed)(BlockJob *job, int64_t speed, Error **errp);
 
+    /** Optional callback for job types that need to forward I/O status reset */
+    void (*iostatus_reset)(BlockJob *job);
+
     /**
      * Optional callback for job types whose completion must be triggered
      * manually.
@@ -253,7 +256,8 @@ int block_job_cancel_sync(BlockJob *job);
  * block_job_iostatus_reset:
  * @job: The job whose I/O status should be reset.
  *
- * Reset I/O status on @job.
+ * Reset I/O status on @job and on BlockDriverState objects it uses,
+ * other than job->bs.
  */
 void block_job_iostatus_reset(BlockJob *job);
 
commit 44c7ca5ebfb20c8086ac78f72a1bc6566828f956
Author: Paolo Bonzini <pbonzini at redhat.com>
Date:   Tue Oct 23 16:39:22 2012 +0200

    qemu-iotests: add mirroring test case
    
    Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>
    Signed-off-by: Kevin Wolf <kwolf at redhat.com>

diff --git a/tests/qemu-iotests/041 b/tests/qemu-iotests/041
new file mode 100755
index 0000000..fd9760a
--- /dev/null
+++ b/tests/qemu-iotests/041
@@ -0,0 +1,362 @@
+#!/usr/bin/env python
+#
+# Tests for image mirroring.
+#
+# Copyright (C) 2012 Red Hat, Inc.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+
+import time
+import os
+import iotests
+from iotests import qemu_img, qemu_io
+import struct
+
+backing_img = os.path.join(iotests.test_dir, 'backing.img')
+target_backing_img = os.path.join(iotests.test_dir, 'target-backing.img')
+test_img = os.path.join(iotests.test_dir, 'test.img')
+target_img = os.path.join(iotests.test_dir, 'target.img')
+
+class ImageMirroringTestCase(iotests.QMPTestCase):
+    '''Abstract base class for image mirroring test cases'''
+
+    def assert_no_active_mirrors(self):
+        result = self.vm.qmp('query-block-jobs')
+        self.assert_qmp(result, 'return', [])
+
+    def cancel_and_wait(self, drive='drive0', wait_ready=True):
+        '''Cancel a block job and wait for it to finish'''
+        if wait_ready:
+            ready = False
+            while not ready:
+                for event in self.vm.get_qmp_events(wait=True):
+                    if event['event'] == 'BLOCK_JOB_READY':
+                        self.assert_qmp(event, 'data/type', 'mirror')
+                        self.assert_qmp(event, 'data/device', drive)
+                        ready = True
+
+        result = self.vm.qmp('block-job-cancel', device=drive,
+                             force=not wait_ready)
+        self.assert_qmp(result, 'return', {})
+
+        cancelled = False
+        while not cancelled:
+            for event in self.vm.get_qmp_events(wait=True):
+                if event['event'] == 'BLOCK_JOB_COMPLETED' or \
+                   event['event'] == 'BLOCK_JOB_CANCELLED':
+                    self.assert_qmp(event, 'data/type', 'mirror')
+                    self.assert_qmp(event, 'data/device', drive)
+                    if wait_ready:
+                        self.assertEquals(event['event'], 'BLOCK_JOB_COMPLETED')
+                        self.assert_qmp(event, 'data/offset', self.image_len)
+                        self.assert_qmp(event, 'data/len', self.image_len)
+                    cancelled = True
+
+        self.assert_no_active_mirrors()
+
+    def complete_and_wait(self, drive='drive0', wait_ready=True):
+        '''Complete a block job and wait for it to finish'''
+        if wait_ready:
+            ready = False
+            while not ready:
+                for event in self.vm.get_qmp_events(wait=True):
+                    if event['event'] == 'BLOCK_JOB_READY':
+                        self.assert_qmp(event, 'data/type', 'mirror')
+                        self.assert_qmp(event, 'data/device', drive)
+                        ready = True
+
+        result = self.vm.qmp('block-job-complete', device=drive)
+        self.assert_qmp(result, 'return', {})
+
+        completed = False
+        while not completed:
+            for event in self.vm.get_qmp_events(wait=True):
+                if event['event'] == 'BLOCK_JOB_COMPLETED':
+                    self.assert_qmp(event, 'data/type', 'mirror')
+                    self.assert_qmp(event, 'data/device', drive)
+                    self.assert_qmp_absent(event, 'data/error')
+                    self.assert_qmp(event, 'data/offset', self.image_len)
+                    self.assert_qmp(event, 'data/len', self.image_len)
+                    completed = True
+
+        self.assert_no_active_mirrors()
+
+    def create_image(self, name, size):
+        file = open(name, 'w')
+        i = 0
+        while i < size:
+            sector = struct.pack('>l504xl', i / 512, i / 512)
+            file.write(sector)
+            i = i + 512
+        file.close()
+
+    def compare_images(self, img1, img2):
+        try:
+            qemu_img('convert', '-f', iotests.imgfmt, '-O', 'raw', img1, img1 + '.raw')
+            qemu_img('convert', '-f', iotests.imgfmt, '-O', 'raw', img2, img2 + '.raw')
+            file1 = open(img1 + '.raw', 'r')
+            file2 = open(img2 + '.raw', 'r')
+            return file1.read() == file2.read()
+        finally:
+            if file1 is not None:
+                file1.close()
+            if file2 is not None:
+                file2.close()
+            try:
+                os.remove(img1 + '.raw')
+            except OSError:
+                pass
+            try:
+                os.remove(img2 + '.raw')
+            except OSError:
+                pass
+
+class TestSingleDrive(ImageMirroringTestCase):
+    image_len = 1 * 1024 * 1024 # MB
+
+    def setUp(self):
+        self.create_image(backing_img, TestSingleDrive.image_len)
+        qemu_img('create', '-f', iotests.imgfmt, '-o', 'backing_file=%s' % backing_img, test_img)
+        self.vm = iotests.VM().add_drive(test_img)
+        self.vm.launch()
+
+    def tearDown(self):
+        self.vm.shutdown()
+        os.remove(test_img)
+        os.remove(backing_img)
+        try:
+            os.remove(target_img)
+        except OSError:
+            pass
+
+    def test_complete(self):
+        self.assert_no_active_mirrors()
+
+        result = self.vm.qmp('drive-mirror', device='drive0', sync='full',
+                             target=target_img)
+        self.assert_qmp(result, 'return', {})
+
+        self.complete_and_wait()
+        result = self.vm.qmp('query-block')
+        self.assert_qmp(result, 'return[0]/inserted/file', target_img)
+        self.vm.shutdown()
+        self.assertTrue(self.compare_images(test_img, target_img),
+                        'target image does not match source after mirroring')
+
+    def test_cancel(self):
+        self.assert_no_active_mirrors()
+
+        result = self.vm.qmp('drive-mirror', device='drive0', sync='full',
+                             target=target_img)
+        self.assert_qmp(result, 'return', {})
+
+        self.cancel_and_wait(wait_ready=False)
+        result = self.vm.qmp('query-block')
+        self.assert_qmp(result, 'return[0]/inserted/file', test_img)
+        self.vm.shutdown()
+
+    def test_cancel_after_ready(self):
+        self.assert_no_active_mirrors()
+
+        result = self.vm.qmp('drive-mirror', device='drive0', sync='full',
+                             target=target_img)
+        self.assert_qmp(result, 'return', {})
+
+        self.cancel_and_wait()
+        result = self.vm.qmp('query-block')
+        self.assert_qmp(result, 'return[0]/inserted/file', test_img)
+        self.vm.shutdown()
+        self.assertTrue(self.compare_images(test_img, target_img),
+                        'target image does not match source after mirroring')
+
+    def test_pause(self):
+        self.assert_no_active_mirrors()
+
+        result = self.vm.qmp('drive-mirror', device='drive0', sync='full',
+                             target=target_img)
+        self.assert_qmp(result, 'return', {})
+
+        result = self.vm.qmp('block-job-pause', device='drive0')
+        self.assert_qmp(result, 'return', {})
+
+        time.sleep(1)
+        result = self.vm.qmp('query-block-jobs')
+        offset = self.dictpath(result, 'return[0]/offset')
+
+        time.sleep(1)
+        result = self.vm.qmp('query-block-jobs')
+        self.assert_qmp(result, 'return[0]/offset', offset)
+
+        result = self.vm.qmp('block-job-resume', device='drive0')
+        self.assert_qmp(result, 'return', {})
+
+        self.complete_and_wait()
+        self.vm.shutdown()
+        self.assertTrue(self.compare_images(test_img, target_img),
+                        'target image does not match source after mirroring')
+
+    def test_large_cluster(self):
+        self.assert_no_active_mirrors()
+
+        qemu_img('create', '-f', iotests.imgfmt, '-o', 'cluster_size=%d,backing_file=%s'
+                        % (TestSingleDrive.image_len, backing_img), target_img)
+        result = self.vm.qmp('drive-mirror', device='drive0', sync='full',
+                             mode='existing', target=target_img)
+        self.assert_qmp(result, 'return', {})
+
+        self.complete_and_wait()
+        result = self.vm.qmp('query-block')
+        self.assert_qmp(result, 'return[0]/inserted/file', target_img)
+        self.vm.shutdown()
+        self.assertTrue(self.compare_images(test_img, target_img),
+                        'target image does not match source after mirroring')
+
+    def test_medium_not_found(self):
+        result = self.vm.qmp('drive-mirror', device='ide1-cd0', sync='full',
+                             target=target_img)
+        self.assert_qmp(result, 'error/class', 'GenericError')
+
+    def test_image_not_found(self):
+        result = self.vm.qmp('drive-mirror', device='drive0', sync='full',
+                             mode='existing', target=target_img)
+        self.assert_qmp(result, 'error/class', 'GenericError')
+
+    def test_device_not_found(self):
+        result = self.vm.qmp('drive-mirror', device='nonexistent', sync='full',
+                             target=target_img)
+        self.assert_qmp(result, 'error/class', 'DeviceNotFound')
+
+class TestMirrorNoBacking(ImageMirroringTestCase):
+    image_len = 2 * 1024 * 1024 # MB
+
+    def complete_and_wait(self, drive='drive0', wait_ready=True):
+        self.create_image(target_backing_img, TestMirrorNoBacking.image_len)
+        return ImageMirroringTestCase.complete_and_wait(self, drive, wait_ready)
+
+    def compare_images(self, img1, img2):
+        self.create_image(target_backing_img, TestMirrorNoBacking.image_len)
+        return ImageMirroringTestCase.compare_images(self, img1, img2)
+
+    def setUp(self):
+        self.create_image(backing_img, TestMirrorNoBacking.image_len)
+        qemu_img('create', '-f', iotests.imgfmt, '-o', 'backing_file=%s' % backing_img, test_img)
+        self.vm = iotests.VM().add_drive(test_img)
+        self.vm.launch()
+
+    def tearDown(self):
+        self.vm.shutdown()
+        os.remove(test_img)
+        os.remove(backing_img)
+        os.remove(target_backing_img)
+        os.remove(target_img)
+
+    def test_complete(self):
+        self.assert_no_active_mirrors()
+
+        qemu_img('create', '-f', iotests.imgfmt, '-o', 'backing_file=%s' % backing_img, target_img)
+        result = self.vm.qmp('drive-mirror', device='drive0', sync='full',
+                             mode='existing', target=target_img)
+        self.assert_qmp(result, 'return', {})
+
+        self.complete_and_wait()
+        result = self.vm.qmp('query-block')
+        self.assert_qmp(result, 'return[0]/inserted/file', target_img)
+        self.vm.shutdown()
+        self.assertTrue(self.compare_images(test_img, target_img),
+                        'target image does not match source after mirroring')
+
+    def test_cancel(self):
+        self.assert_no_active_mirrors()
+
+        qemu_img('create', '-f', iotests.imgfmt, '-o', 'backing_file=%s' % backing_img, target_img)
+        result = self.vm.qmp('drive-mirror', device='drive0', sync='full',
+                             mode='existing', target=target_img)
+        self.assert_qmp(result, 'return', {})
+
+        self.cancel_and_wait()
+        result = self.vm.qmp('query-block')
+        self.assert_qmp(result, 'return[0]/inserted/file', test_img)
+        self.vm.shutdown()
+        self.assertTrue(self.compare_images(test_img, target_img),
+                        'target image does not match source after mirroring')
+
+class TestSetSpeed(ImageMirroringTestCase):
+    image_len = 80 * 1024 * 1024 # MB
+
+    def setUp(self):
+        qemu_img('create', backing_img, str(TestSetSpeed.image_len))
+        qemu_img('create', '-f', iotests.imgfmt, '-o', 'backing_file=%s' % backing_img, test_img)
+        self.vm = iotests.VM().add_drive(test_img)
+        self.vm.launch()
+
+    def tearDown(self):
+        self.vm.shutdown()
+        os.remove(test_img)
+        os.remove(backing_img)
+        os.remove(target_img)
+
+    def test_set_speed(self):
+        self.assert_no_active_mirrors()
+
+        result = self.vm.qmp('drive-mirror', device='drive0', sync='full',
+                             target=target_img)
+        self.assert_qmp(result, 'return', {})
+
+        # Default speed is 0
+        result = self.vm.qmp('query-block-jobs')
+        self.assert_qmp(result, 'return[0]/device', 'drive0')
+        self.assert_qmp(result, 'return[0]/speed', 0)
+
+        result = self.vm.qmp('block-job-set-speed', device='drive0', speed=8 * 1024 * 1024)
+        self.assert_qmp(result, 'return', {})
+
+        # Ensure the speed we set was accepted
+        result = self.vm.qmp('query-block-jobs')
+        self.assert_qmp(result, 'return[0]/device', 'drive0')
+        self.assert_qmp(result, 'return[0]/speed', 8 * 1024 * 1024)
+
+        self.cancel_and_wait()
+
+        # Check setting speed in drive-mirror works
+        result = self.vm.qmp('drive-mirror', device='drive0', sync='full',
+                             target=target_img, speed=4*1024*1024)
+        self.assert_qmp(result, 'return', {})
+
+        result = self.vm.qmp('query-block-jobs')
+        self.assert_qmp(result, 'return[0]/device', 'drive0')
+        self.assert_qmp(result, 'return[0]/speed', 4 * 1024 * 1024)
+
+        self.cancel_and_wait()
+
+    def test_set_speed_invalid(self):
+        self.assert_no_active_mirrors()
+
+        result = self.vm.qmp('drive-mirror', device='drive0', sync='full',
+                             target=target_img, speed=-1)
+        self.assert_qmp(result, 'error/class', 'GenericError')
+
+        self.assert_no_active_mirrors()
+
+        result = self.vm.qmp('drive-mirror', device='drive0', sync='full',
+                             target=target_img)
+        self.assert_qmp(result, 'return', {})
+
+        result = self.vm.qmp('block-job-set-speed', device='drive0', speed=-1)
+        self.assert_qmp(result, 'error/class', 'GenericError')
+
+        self.cancel_and_wait()
+
+if __name__ == '__main__':
+    iotests.main(supported_fmts=['qcow2', 'qed'])
diff --git a/tests/qemu-iotests/041.out b/tests/qemu-iotests/041.out
new file mode 100644
index 0000000..281b69e
--- /dev/null
+++ b/tests/qemu-iotests/041.out
@@ -0,0 +1,5 @@
+............
+----------------------------------------------------------------------
+Ran 12 tests
+
+OK
diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group
index 7407492..ac86f54 100644
--- a/tests/qemu-iotests/group
+++ b/tests/qemu-iotests/group
@@ -47,5 +47,6 @@
 038 rw auto backing
 039 rw auto
 040 rw auto
+041 rw auto backing
 042 rw auto quick
 043 rw auto backing
commit d63ffd87acad618a4a64b8812b64ad88577ae9b1
Author: Paolo Bonzini <pbonzini at redhat.com>
Date:   Thu Oct 18 16:49:25 2012 +0200

    mirror: implement completion
    
    Switching to the target of the migration is done mostly asynchronously,
    and reported to management via the BLOCK_JOB_COMPLETED event; the only
    synchronous phase is opening the backing files.  bdrv_open_backing_file
    can always be done, even for migration of the full image (aka sync:
    'full').  In this case, qmp_drive_mirror will create the target disk
    with no backing file at all, and bdrv_open_backing_file will be a no-op.
    
    Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>
    Signed-off-by: Kevin Wolf <kwolf at redhat.com>

diff --git a/block/mirror.c b/block/mirror.c
index b353798..6320f6a 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -32,6 +32,8 @@ typedef struct MirrorBlockJob {
     RateLimit limit;
     BlockDriverState *target;
     MirrorSyncMode mode;
+    bool synced;
+    bool should_complete;
     int64_t sector_num;
     uint8_t *buf;
 } MirrorBlockJob;
@@ -70,7 +72,6 @@ static void coroutine_fn mirror_run(void *opaque)
     int64_t sector_num, end;
     int ret = 0;
     int n;
-    bool synced = false;
 
     if (block_job_is_cancelled(&s->common)) {
         goto immediate_exit;
@@ -136,9 +137,14 @@ static void coroutine_fn mirror_run(void *opaque)
              * report completion.  This way, block-job-cancel will leave
              * the target in a consistent state.
              */
-            synced = true;
             s->common.offset = end * BDRV_SECTOR_SIZE;
-            should_complete = block_job_is_cancelled(&s->common);
+            if (!s->synced) {
+                block_job_ready(&s->common);
+                s->synced = true;
+            }
+
+            should_complete = s->should_complete ||
+                block_job_is_cancelled(&s->common);
             cnt = bdrv_get_dirty_count(bs);
         }
 
@@ -157,8 +163,8 @@ static void coroutine_fn mirror_run(void *opaque)
         }
 
         ret = 0;
-        trace_mirror_before_sleep(s, cnt, synced);
-        if (!synced) {
+        trace_mirror_before_sleep(s, cnt, s->synced);
+        if (!s->synced) {
             /* Publish progress */
             s->common.offset = end * BDRV_SECTOR_SIZE - cnt * BLOCK_SIZE;
 
@@ -191,6 +197,12 @@ static void coroutine_fn mirror_run(void *opaque)
 immediate_exit:
     g_free(s->buf);
     bdrv_set_dirty_tracking(bs, false);
+    if (s->should_complete && ret == 0) {
+        if (bdrv_get_flags(s->target) != bdrv_get_flags(s->common.bs)) {
+            bdrv_reopen(s->target, bdrv_get_flags(s->common.bs), NULL);
+        }
+        bdrv_swap(s->target, s->common.bs);
+    }
     bdrv_close(s->target);
     bdrv_delete(s->target);
     block_job_completed(&s->common, ret);
@@ -207,10 +219,33 @@ static void mirror_set_speed(BlockJob *job, int64_t speed, Error **errp)
     ratelimit_set_speed(&s->limit, speed / BDRV_SECTOR_SIZE, SLICE_TIME);
 }
 
+static void mirror_complete(BlockJob *job, Error **errp)
+{
+    MirrorBlockJob *s = container_of(job, MirrorBlockJob, common);
+    int ret;
+
+    ret = bdrv_open_backing_file(s->target);
+    if (ret < 0) {
+        char backing_filename[PATH_MAX];
+        bdrv_get_full_backing_filename(s->target, backing_filename,
+                                       sizeof(backing_filename));
+        error_set(errp, QERR_OPEN_FILE_FAILED, backing_filename);
+        return;
+    }
+    if (!s->synced) {
+        error_set(errp, QERR_BLOCK_JOB_NOT_READY, job->bs->device_name);
+        return;
+    }
+
+    s->should_complete = true;
+    block_job_resume(job);
+}
+
 static BlockJobType mirror_job_type = {
     .instance_size = sizeof(MirrorBlockJob),
     .job_type      = "mirror",
     .set_speed     = mirror_set_speed,
+    .complete      = mirror_complete,
 };
 
 void mirror_start(BlockDriverState *bs, BlockDriverState *target,
commit d9b902db3fb71fdcdfbb2aa9cc94a410dee864e5
Author: Paolo Bonzini <pbonzini at redhat.com>
Date:   Thu Oct 18 16:49:24 2012 +0200

    qmp: add drive-mirror command
    
    This adds the monitor commands that start the mirroring job.
    
    Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>
    Signed-off-by: Kevin Wolf <kwolf at redhat.com>

diff --git a/blockdev.c b/blockdev.c
index 248d5f6..431c678 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -1181,6 +1181,130 @@ void qmp_block_commit(const char *device,
     drive_get_ref(drive_get_by_blockdev(bs));
 }
 
+void qmp_drive_mirror(const char *device, const char *target,
+                      bool has_format, const char *format,
+                      enum MirrorSyncMode sync,
+                      bool has_mode, enum NewImageMode mode,
+                      bool has_speed, int64_t speed, Error **errp)
+{
+    BlockDriverInfo bdi;
+    BlockDriverState *bs;
+    BlockDriverState *source, *target_bs;
+    BlockDriver *proto_drv;
+    BlockDriver *drv = NULL;
+    Error *local_err = NULL;
+    int flags;
+    uint64_t size;
+    int ret;
+
+    if (!has_speed) {
+        speed = 0;
+    }
+    if (!has_mode) {
+        mode = NEW_IMAGE_MODE_ABSOLUTE_PATHS;
+    }
+
+    bs = bdrv_find(device);
+    if (!bs) {
+        error_set(errp, QERR_DEVICE_NOT_FOUND, device);
+        return;
+    }
+
+    if (!bdrv_is_inserted(bs)) {
+        error_set(errp, QERR_DEVICE_HAS_NO_MEDIUM, device);
+        return;
+    }
+
+    if (!has_format) {
+        format = mode == NEW_IMAGE_MODE_EXISTING ? NULL : bs->drv->format_name;
+    }
+    if (format) {
+        drv = bdrv_find_format(format);
+        if (!drv) {
+            error_set(errp, QERR_INVALID_BLOCK_FORMAT, format);
+            return;
+        }
+    }
+
+    if (bdrv_in_use(bs)) {
+        error_set(errp, QERR_DEVICE_IN_USE, device);
+        return;
+    }
+
+    flags = bs->open_flags | BDRV_O_RDWR;
+    source = bs->backing_hd;
+    if (!source && sync == MIRROR_SYNC_MODE_TOP) {
+        sync = MIRROR_SYNC_MODE_FULL;
+    }
+
+    proto_drv = bdrv_find_protocol(target);
+    if (!proto_drv) {
+        error_set(errp, QERR_INVALID_BLOCK_FORMAT, format);
+        return;
+    }
+
+    if (sync == MIRROR_SYNC_MODE_FULL && mode != NEW_IMAGE_MODE_EXISTING) {
+        /* create new image w/o backing file */
+        assert(format && drv);
+        bdrv_get_geometry(bs, &size);
+        size *= 512;
+        ret = bdrv_img_create(target, format,
+                              NULL, NULL, NULL, size, flags);
+    } else {
+        switch (mode) {
+        case NEW_IMAGE_MODE_EXISTING:
+            ret = 0;
+            break;
+        case NEW_IMAGE_MODE_ABSOLUTE_PATHS:
+            /* create new image with backing file */
+            ret = bdrv_img_create(target, format,
+                                  source->filename,
+                                  source->drv->format_name,
+                                  NULL, -1, flags);
+            break;
+        default:
+            abort();
+        }
+    }
+
+    if (ret) {
+        error_set(errp, QERR_OPEN_FILE_FAILED, target);
+        return;
+    }
+
+    target_bs = bdrv_new("");
+    ret = bdrv_open(target_bs, target, flags | BDRV_O_NO_BACKING, drv);
+
+    if (ret < 0) {
+        bdrv_delete(target_bs);
+        error_set(errp, QERR_OPEN_FILE_FAILED, target);
+        return;
+    }
+
+    /* We need a backing file if we will copy parts of a cluster.  */
+    if (bdrv_get_info(target_bs, &bdi) >= 0 && bdi.cluster_size != 0 &&
+        bdi.cluster_size >= BDRV_SECTORS_PER_DIRTY_CHUNK * 512) {
+        ret = bdrv_open_backing_file(target_bs);
+        if (ret < 0) {
+            bdrv_delete(target_bs);
+            error_set(errp, QERR_OPEN_FILE_FAILED, target);
+            return;
+        }
+    }
+
+    mirror_start(bs, target_bs, speed, sync, block_job_cb, bs, &local_err);
+    if (local_err != NULL) {
+        bdrv_delete(target_bs);
+        error_propagate(errp, local_err);
+        return;
+    }
+
+    /* Grab a reference so hotplug does not delete the BlockDriverState from
+     * underneath us.
+     */
+    drive_get_ref(drive_get_by_blockdev(bs));
+}
+
 static BlockJob *find_block_job(const char *device)
 {
     BlockDriverState *bs;
diff --git a/hmp-commands.hx b/hmp-commands.hx
index fb2f237..f916385 100644
--- a/hmp-commands.hx
+++ b/hmp-commands.hx
@@ -1004,6 +1004,27 @@ Snapshot device, using snapshot file as target if provided
 ETEXI
 
     {
+        .name       = "drive_mirror",
+        .args_type  = "reuse:-n,full:-f,device:B,target:s,format:s?",
+        .params     = "[-n] [-f] device target [format]",
+        .help       = "initiates live storage\n\t\t\t"
+                      "migration for a device. The device's contents are\n\t\t\t"
+                      "copied to the new image file, including data that\n\t\t\t"
+                      "is written after the command is started.\n\t\t\t"
+                      "The -n flag requests QEMU to reuse the image found\n\t\t\t"
+                      "in new-image-file, instead of recreating it from scratch.\n\t\t\t"
+                      "The -f flag requests QEMU to copy the whole disk,\n\t\t\t"
+                      "so that the result does not need a backing file.\n\t\t\t",
+        .mhandler.cmd = hmp_drive_mirror,
+    },
+STEXI
+ at item drive_mirror
+ at findex drive_mirror
+Start mirroring a block device's writes to a new destination,
+using the specified target.
+ETEXI
+
+    {
         .name       = "drive_add",
         .args_type  = "pci_addr:s,opts:s",
         .params     = "[[<domain>:]<bus>:]<slot>\n"
diff --git a/hmp.c b/hmp.c
index 574517a..e530253 100644
--- a/hmp.c
+++ b/hmp.c
@@ -771,6 +771,34 @@ void hmp_block_resize(Monitor *mon, const QDict *qdict)
     hmp_handle_error(mon, &errp);
 }
 
+void hmp_drive_mirror(Monitor *mon, const QDict *qdict)
+{
+    const char *device = qdict_get_str(qdict, "device");
+    const char *filename = qdict_get_str(qdict, "target");
+    const char *format = qdict_get_try_str(qdict, "format");
+    int reuse = qdict_get_try_bool(qdict, "reuse", 0);
+    int full = qdict_get_try_bool(qdict, "full", 0);
+    enum NewImageMode mode;
+    Error *errp = NULL;
+
+    if (!filename) {
+        error_set(&errp, QERR_MISSING_PARAMETER, "target");
+        hmp_handle_error(mon, &errp);
+        return;
+    }
+
+    if (reuse) {
+        mode = NEW_IMAGE_MODE_EXISTING;
+    } else {
+        mode = NEW_IMAGE_MODE_ABSOLUTE_PATHS;
+    }
+
+    qmp_drive_mirror(device, filename, !!format, format,
+                     full ? MIRROR_SYNC_MODE_FULL : MIRROR_SYNC_MODE_TOP,
+                     true, mode, false, 0, &errp);
+    hmp_handle_error(mon, &errp);
+}
+
 void hmp_snapshot_blkdev(Monitor *mon, const QDict *qdict)
 {
     const char *device = qdict_get_str(qdict, "device");
diff --git a/hmp.h b/hmp.h
index 7bdd23c..34eb2b3 100644
--- a/hmp.h
+++ b/hmp.h
@@ -51,6 +51,7 @@ void hmp_block_passwd(Monitor *mon, const QDict *qdict);
 void hmp_balloon(Monitor *mon, const QDict *qdict);
 void hmp_block_resize(Monitor *mon, const QDict *qdict);
 void hmp_snapshot_blkdev(Monitor *mon, const QDict *qdict);
+void hmp_drive_mirror(Monitor *mon, const QDict *qdict);
 void hmp_migrate_cancel(Monitor *mon, const QDict *qdict);
 void hmp_migrate_set_downtime(Monitor *mon, const QDict *qdict);
 void hmp_migrate_set_speed(Monitor *mon, const QDict *qdict);
diff --git a/qapi-schema.json b/qapi-schema.json
index 8c4b7c8..a066cd5 100644
--- a/qapi-schema.json
+++ b/qapi-schema.json
@@ -1606,6 +1606,40 @@
   'data': { 'device': 'str', '*base': 'str', 'top': 'str',
             '*speed': 'int' } }
 
+##
+# @drive-mirror
+#
+# Start mirroring a block device's writes to a new destination.
+#
+# @device:  the name of the device whose writes should be mirrored.
+#
+# @target: the target of the new image. If the file exists, or if it
+#          is a device, the existing file/device will be used as the new
+#          destination.  If it does not exist, a new file will be created.
+#
+# @format: #optional the format of the new destination, default is to
+#          probe if @mode is 'existing', else the format of the source
+#
+# @mode: #optional whether and how QEMU should create a new image, default is
+#        'absolute-paths'.
+#
+# @speed:  #optional the maximum speed, in bytes per second
+#
+# @sync: what parts of the disk image should be copied to the destination
+#        (all the disk, only the sectors allocated in the topmost image, or
+#        only new I/O).
+#
+# Returns: nothing on success
+#          If @device is not a valid block device, DeviceNotFound
+#
+# Since 1.3
+##
+{ 'command': 'drive-mirror',
+  'data': { 'device': 'str', 'target': 'str', '*format': 'str',
+            'sync': 'MirrorSyncMode', '*mode': 'NewImageMode',
+            '*speed': 'int' } }
+
+##
 # @migrate_cancel
 #
 # Cancel the current executing migration process.
diff --git a/qmp-commands.hx b/qmp-commands.hx
index 4f9c711..614baea 100644
--- a/qmp-commands.hx
+++ b/qmp-commands.hx
@@ -936,6 +936,48 @@ Example:
 EQMP
 
     {
+        .name       = "drive-mirror",
+        .args_type  = "sync:s,device:B,target:s,speed:i?,mode:s?,format:s?",
+        .mhandler.cmd_new = qmp_marshal_input_drive_mirror,
+    },
+
+SQMP
+drive-mirror
+------------
+
+Start mirroring a block device's writes to a new destination. target
+specifies the target of the new image. If the file exists, or if it is
+a device, it will be used as the new destination for writes. If it does not
+exist, a new file will be created. format specifies the format of the
+mirror image, default is to probe if mode='existing', else the format
+of the source.
+
+Arguments:
+
+- "device": device name to operate on (json-string)
+- "target": name of new image file (json-string)
+- "format": format of new image (json-string, optional)
+- "mode": how an image file should be created into the target
+  file/device (NewImageMode, optional, default 'absolute-paths')
+- "speed": maximum speed of the streaming job, in bytes per second
+  (json-int)
+- "sync": what parts of the disk image should be copied to the destination;
+  possibilities include "full" for all the disk, "top" for only the sectors
+  allocated in the topmost image, or "none" to only replicate new I/O
+  (MirrorSyncMode).
+
+
+Example:
+
+-> { "execute": "drive-mirror", "arguments": { "device": "ide-hd0",
+                                               "target": "/some/place/my-image",
+                                               "sync": "full",
+                                               "format": "qcow2" } }
+<- { "return": {} }
+
+EQMP
+
+    {
         .name       = "balloon",
         .args_type  = "value:M",
         .mhandler.cmd_new = qmp_marshal_input_balloon,
commit 893f7ebafe4e8afc0ce4dbd9e64b3752f3036bbb
Author: Paolo Bonzini <pbonzini at redhat.com>
Date:   Thu Oct 18 16:49:23 2012 +0200

    mirror: introduce mirror job
    
    This patch adds the implementation of a new job that mirrors a disk to
    a new image while letting the guest continue using the old image.
    The target is treated as a "black box" and data is copied from the
    source to the target in the background.  This can be used for several
    purposes, including storage migration, continuous replication, and
    observation of the guest I/O in an external program.  It is also a
    first step in replacing the inefficient block migration code that is
    part of QEMU.
    
    The job is possibly never-ending, but it is logically structured into
    two phases: 1) copy all data as fast as possible until the target
    first gets in sync with the source; 2) keep target in sync and
    ensure that reopening to the target gets a correct (full) copy
    of the source data.
    
    The second phase is indicated by the progress in "info block-jobs"
    reporting the current offset to be equal to the length of the file.
    When the job is cancelled in the second phase, QEMU will run the
    job until the source is clean and quiescent, then it will report
    successful completion of the job.
    
    In other words, the BLOCK_JOB_CANCELLED event means that the target
    may _not_ be consistent with a past state of the source; the
    BLOCK_JOB_COMPLETED event means that the target is consistent with
    a past state of the source.  (Note that it could already happen
    that management lost the race against QEMU and got a completion
    event instead of cancellation).
    
    It is not yet possible to complete the job and switch over to the target
    disk.  The next patches will fix this and add many refinements to the
    basic idea introduced here.  These include improved error management,
    some tunable knobs and performance optimizations.
    
    Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>
    Signed-off-by: Kevin Wolf <kwolf at redhat.com>

diff --git a/block/Makefile.objs b/block/Makefile.objs
index 554f429..806e526 100644
--- a/block/Makefile.objs
+++ b/block/Makefile.objs
@@ -12,3 +12,4 @@ block-obj-$(CONFIG_GLUSTERFS) += gluster.o
 
 common-obj-y += stream.o
 common-obj-y += commit.o
+common-obj-y += mirror.o
diff --git a/block/mirror.c b/block/mirror.c
new file mode 100644
index 0000000..b353798
--- /dev/null
+++ b/block/mirror.c
@@ -0,0 +1,235 @@
+/*
+ * Image mirroring
+ *
+ * Copyright Red Hat, Inc. 2012
+ *
+ * Authors:
+ *  Paolo Bonzini  <pbonzini at redhat.com>
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ *
+ */
+
+#include "trace.h"
+#include "blockjob.h"
+#include "block_int.h"
+#include "qemu/ratelimit.h"
+
+enum {
+    /*
+     * Size of data buffer for populating the image file.  This should be large
+     * enough to process multiple clusters in a single call, so that populating
+     * contiguous regions of the image is efficient.
+     */
+    BLOCK_SIZE = 512 * BDRV_SECTORS_PER_DIRTY_CHUNK, /* in bytes */
+};
+
+#define SLICE_TIME 100000000ULL /* ns */
+
+typedef struct MirrorBlockJob {
+    BlockJob common;
+    RateLimit limit;
+    BlockDriverState *target;
+    MirrorSyncMode mode;
+    int64_t sector_num;
+    uint8_t *buf;
+} MirrorBlockJob;
+
+static int coroutine_fn mirror_iteration(MirrorBlockJob *s)
+{
+    BlockDriverState *source = s->common.bs;
+    BlockDriverState *target = s->target;
+    QEMUIOVector qiov;
+    int ret, nb_sectors;
+    int64_t end;
+    struct iovec iov;
+
+    end = s->common.len >> BDRV_SECTOR_BITS;
+    s->sector_num = bdrv_get_next_dirty(source, s->sector_num);
+    nb_sectors = MIN(BDRV_SECTORS_PER_DIRTY_CHUNK, end - s->sector_num);
+    bdrv_reset_dirty(source, s->sector_num, nb_sectors);
+
+    /* Copy the dirty cluster.  */
+    iov.iov_base = s->buf;
+    iov.iov_len  = nb_sectors * 512;
+    qemu_iovec_init_external(&qiov, &iov, 1);
+
+    trace_mirror_one_iteration(s, s->sector_num, nb_sectors);
+    ret = bdrv_co_readv(source, s->sector_num, nb_sectors, &qiov);
+    if (ret < 0) {
+        return ret;
+    }
+    return bdrv_co_writev(target, s->sector_num, nb_sectors, &qiov);
+}
+
+static void coroutine_fn mirror_run(void *opaque)
+{
+    MirrorBlockJob *s = opaque;
+    BlockDriverState *bs = s->common.bs;
+    int64_t sector_num, end;
+    int ret = 0;
+    int n;
+    bool synced = false;
+
+    if (block_job_is_cancelled(&s->common)) {
+        goto immediate_exit;
+    }
+
+    s->common.len = bdrv_getlength(bs);
+    if (s->common.len < 0) {
+        block_job_completed(&s->common, s->common.len);
+        return;
+    }
+
+    end = s->common.len >> BDRV_SECTOR_BITS;
+    s->buf = qemu_blockalign(bs, BLOCK_SIZE);
+
+    if (s->mode != MIRROR_SYNC_MODE_NONE) {
+        /* First part, loop on the sectors and initialize the dirty bitmap.  */
+        BlockDriverState *base;
+        base = s->mode == MIRROR_SYNC_MODE_FULL ? NULL : bs->backing_hd;
+        for (sector_num = 0; sector_num < end; ) {
+            int64_t next = (sector_num | (BDRV_SECTORS_PER_DIRTY_CHUNK - 1)) + 1;
+            ret = bdrv_co_is_allocated_above(bs, base,
+                                             sector_num, next - sector_num, &n);
+
+            if (ret < 0) {
+                goto immediate_exit;
+            }
+
+            assert(n > 0);
+            if (ret == 1) {
+                bdrv_set_dirty(bs, sector_num, n);
+                sector_num = next;
+            } else {
+                sector_num += n;
+            }
+        }
+    }
+
+    s->sector_num = -1;
+    for (;;) {
+        uint64_t delay_ns;
+        int64_t cnt;
+        bool should_complete;
+
+        cnt = bdrv_get_dirty_count(bs);
+        if (cnt != 0) {
+            ret = mirror_iteration(s);
+            if (ret < 0) {
+                goto immediate_exit;
+            }
+            cnt = bdrv_get_dirty_count(bs);
+        }
+
+        should_complete = false;
+        if (cnt == 0) {
+            trace_mirror_before_flush(s);
+            ret = bdrv_flush(s->target);
+            if (ret < 0) {
+                goto immediate_exit;
+            }
+
+            /* We're out of the streaming phase.  From now on, if the job
+             * is cancelled we will actually complete all pending I/O and
+             * report completion.  This way, block-job-cancel will leave
+             * the target in a consistent state.
+             */
+            synced = true;
+            s->common.offset = end * BDRV_SECTOR_SIZE;
+            should_complete = block_job_is_cancelled(&s->common);
+            cnt = bdrv_get_dirty_count(bs);
+        }
+
+        if (cnt == 0 && should_complete) {
+            /* The dirty bitmap is not updated while operations are pending.
+             * If we're about to exit, wait for pending operations before
+             * calling bdrv_get_dirty_count(bs), or we may exit while the
+             * source has dirty data to copy!
+             *
+             * Note that I/O can be submitted by the guest while
+             * mirror_populate runs.
+             */
+            trace_mirror_before_drain(s, cnt);
+            bdrv_drain_all();
+            cnt = bdrv_get_dirty_count(bs);
+        }
+
+        ret = 0;
+        trace_mirror_before_sleep(s, cnt, synced);
+        if (!synced) {
+            /* Publish progress */
+            s->common.offset = end * BDRV_SECTOR_SIZE - cnt * BLOCK_SIZE;
+
+            if (s->common.speed) {
+                delay_ns = ratelimit_calculate_delay(&s->limit, BDRV_SECTORS_PER_DIRTY_CHUNK);
+            } else {
+                delay_ns = 0;
+            }
+
+            /* Note that even when no rate limit is applied we need to yield
+             * with no pending I/O here so that qemu_aio_flush() returns.
+             */
+            block_job_sleep_ns(&s->common, rt_clock, delay_ns);
+            if (block_job_is_cancelled(&s->common)) {
+                break;
+            }
+        } else if (!should_complete) {
+            delay_ns = (cnt == 0 ? SLICE_TIME : 0);
+            block_job_sleep_ns(&s->common, rt_clock, delay_ns);
+        } else if (cnt == 0) {
+            /* The two disks are in sync.  Exit and report successful
+             * completion.
+             */
+            assert(QLIST_EMPTY(&bs->tracked_requests));
+            s->common.cancelled = false;
+            break;
+        }
+    }
+
+immediate_exit:
+    g_free(s->buf);
+    bdrv_set_dirty_tracking(bs, false);
+    bdrv_close(s->target);
+    bdrv_delete(s->target);
+    block_job_completed(&s->common, ret);
+}
+
+static void mirror_set_speed(BlockJob *job, int64_t speed, Error **errp)
+{
+    MirrorBlockJob *s = container_of(job, MirrorBlockJob, common);
+
+    if (speed < 0) {
+        error_set(errp, QERR_INVALID_PARAMETER, "speed");
+        return;
+    }
+    ratelimit_set_speed(&s->limit, speed / BDRV_SECTOR_SIZE, SLICE_TIME);
+}
+
+static BlockJobType mirror_job_type = {
+    .instance_size = sizeof(MirrorBlockJob),
+    .job_type      = "mirror",
+    .set_speed     = mirror_set_speed,
+};
+
+void mirror_start(BlockDriverState *bs, BlockDriverState *target,
+                  int64_t speed, MirrorSyncMode mode,
+                  BlockDriverCompletionFunc *cb,
+                  void *opaque, Error **errp)
+{
+    MirrorBlockJob *s;
+
+    s = block_job_create(&mirror_job_type, bs, speed, cb, opaque, errp);
+    if (!s) {
+        return;
+    }
+
+    s->target = target;
+    s->mode = mode;
+    bdrv_set_dirty_tracking(bs, true);
+    bdrv_set_enable_write_cache(s->target, true);
+    s->common.co = qemu_coroutine_create(mirror_run);
+    trace_mirror_start(bs, s, s->common.co, opaque);
+    qemu_coroutine_enter(s->common.co, s);
+}
diff --git a/block_int.h b/block_int.h
index f4bae04..aaa46a8 100644
--- a/block_int.h
+++ b/block_int.h
@@ -331,4 +331,24 @@ void commit_start(BlockDriverState *bs, BlockDriverState *base,
                  BlockdevOnError on_error, BlockDriverCompletionFunc *cb,
                  void *opaque, Error **errp);
 
+/*
+ * mirror_start:
+ * @bs: Block device to operate on.
+ * @target: Block device to write to.
+ * @speed: The maximum speed, in bytes per second, or 0 for unlimited.
+ * @mode: Whether to collapse all images in the chain to the target.
+ * @cb: Completion function for the job.
+ * @opaque: Opaque pointer value passed to @cb.
+ * @errp: Error object.
+ *
+ * Start a mirroring operation on @bs.  Clusters that are allocated
+ * in @bs will be written to @bs until the job is cancelled or
+ * manually completed.  At the end of a successful mirroring job,
+ * @bs will be switched to read from @target.
+ */
+void mirror_start(BlockDriverState *bs, BlockDriverState *target,
+                  int64_t speed, MirrorSyncMode mode,
+                  BlockDriverCompletionFunc *cb,
+                  void *opaque, Error **errp);
+
 #endif /* BLOCK_INT_H */
diff --git a/qapi-schema.json b/qapi-schema.json
index 37bbeca..8c4b7c8 100644
--- a/qapi-schema.json
+++ b/qapi-schema.json
@@ -1166,6 +1166,23 @@
   'data': ['report', 'ignore', 'enospc', 'stop'] }
 
 ##
+# @MirrorSyncMode:
+#
+# An enumeration of possible behaviors for the initial synchronization
+# phase of storage mirroring.
+#
+# @top: copies data in the topmost image to the destination
+#
+# @full: copies data from all images to the destination
+#
+# @none: only copy data written from now on
+#
+# Since: 1.3
+##
+{ 'enum': 'MirrorSyncMode',
+  'data': ['top', 'full', 'none'] }
+
+##
 # @BlockJobInfo:
 #
 # Information about a long-running block device operation.
diff --git a/trace-events b/trace-events
index 9ab8e27..09b5d55 100644
--- a/trace-events
+++ b/trace-events
@@ -77,6 +77,13 @@ stream_start(void *bs, void *base, void *s, void *co, void *opaque) "bs %p base
 commit_one_iteration(void *s, int64_t sector_num, int nb_sectors, int is_allocated) "s %p sector_num %"PRId64" nb_sectors %d is_allocated %d"
 commit_start(void *bs, void *base, void *top, void *s, void *co, void *opaque) "bs %p base %p top %p s %p co %p opaque %p"
 
+# block/mirror.c
+mirror_start(void *bs, void *s, void *co, void *opaque) "bs %p s %p co %p opaque %p"
+mirror_before_flush(void *s) "s %p"
+mirror_before_drain(void *s, int64_t cnt) "s %p dirty count %"PRId64
+mirror_before_sleep(void *s, int64_t cnt, int synced) "s %p dirty count %"PRId64" synced %d"
+mirror_one_iteration(void *s, int64_t sector_num, int nb_sectors) "s %p sector_num %"PRId64" nb_sectors %d"
+
 # blockdev.c
 qmp_block_job_cancel(void *job) "job %p"
 qmp_block_job_pause(void *job) "job %p"
commit a66a2a368383e627b929bf42d1b972822491404b
Author: Paolo Bonzini <pbonzini at redhat.com>
Date:   Mon Jul 23 15:15:47 2012 +0200

    block: introduce BLOCK_JOB_READY event
    
    Even for jobs that need to be manually completed, management may want
    to take care itself of the completion, not requiring the user to issue
    a command to terminate the job.  In this case we want to avoid that
    they poll us continuously, waiting for completion to become available.
    Thus, add a new event that signals the phase switch and the availability
    of the block-job-complete command.
    
    Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>
    Signed-off-by: Kevin Wolf <kwolf at redhat.com>

diff --git a/QMP/qmp-events.txt b/QMP/qmp-events.txt
index 987c575..b2698e4 100644
--- a/QMP/qmp-events.txt
+++ b/QMP/qmp-events.txt
@@ -118,6 +118,24 @@ Example:
               "action": "stop" },
     "timestamp": { "seconds": 1265044230, "microseconds": 450486 } }
 
+BLOCK_JOB_READY
+---------------
+
+Emitted when a block job is ready to complete.
+
+Data:
+
+- "device": device name (json-string)
+
+Example:
+
+{ "event": "BLOCK_JOB_READY",
+    "data": { "device": "ide0-hd1" },
+    "timestamp": { "seconds": 1265044230, "microseconds": 450486 } }
+
+Note: The "ready to complete" status is always reset by a BLOCK_JOB_ERROR
+event.
+
 DEVICE_TRAY_MOVED
 -----------------
 
diff --git a/blockdev.c b/blockdev.c
index 02d3e0b..248d5f6 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -1056,20 +1056,6 @@ void qmp_block_resize(const char *device, int64_t size, Error **errp)
     }
 }
 
-static QObject *qobject_from_block_job(BlockJob *job)
-{
-    return qobject_from_jsonf("{ 'type': %s,"
-                              "'device': %s,"
-                              "'len': %" PRId64 ","
-                              "'offset': %" PRId64 ","
-                              "'speed': %" PRId64 " }",
-                              job->job_type->job_type,
-                              bdrv_get_device_name(job->bs),
-                              job->len,
-                              job->offset,
-                              job->speed);
-}
-
 static void block_job_cb(void *opaque, int ret)
 {
     BlockDriverState *bs = opaque;
diff --git a/blockjob.c b/blockjob.c
index c93a0e0..fbb7e1c 100644
--- a/blockjob.c
+++ b/blockjob.c
@@ -225,6 +225,27 @@ static void block_job_iostatus_set_err(BlockJob *job, int error)
 }
 
 
+QObject *qobject_from_block_job(BlockJob *job)
+{
+    return qobject_from_jsonf("{ 'type': %s,"
+                              "'device': %s,"
+                              "'len': %" PRId64 ","
+                              "'offset': %" PRId64 ","
+                              "'speed': %" PRId64 " }",
+                              job->job_type->job_type,
+                              bdrv_get_device_name(job->bs),
+                              job->len,
+                              job->offset,
+                              job->speed);
+}
+
+void block_job_ready(BlockJob *job)
+{
+    QObject *data = qobject_from_block_job(job);
+    monitor_protocol_event(QEVENT_BLOCK_JOB_READY, data);
+    qobject_decref(data);
+}
+
 BlockErrorAction block_job_error_action(BlockJob *job, BlockDriverState *bs,
                                         BlockdevOnError on_err,
                                         int is_read, int error)
diff --git a/blockjob.h b/blockjob.h
index c44e2ea..fb2392e 100644
--- a/blockjob.h
+++ b/blockjob.h
@@ -211,6 +211,22 @@ void block_job_pause(BlockJob *job);
 void block_job_resume(BlockJob *job);
 
 /**
+ * qobject_from_block_job:
+ * @job: The job whose information is requested.
+ *
+ * Return a QDict corresponding to @job's query-block-jobs entry.
+ */
+QObject *qobject_from_block_job(BlockJob *job);
+
+/**
+ * block_job_ready:
+ * @job: The job which is now ready to complete.
+ *
+ * Send a BLOCK_JOB_READY event for the specified job.
+ */
+void block_job_ready(BlockJob *job);
+
+/**
  * block_job_is_paused:
  * @job: The job being queried.
  *
diff --git a/monitor.c b/monitor.c
index 1d3dd9a..eeef32e 100644
--- a/monitor.c
+++ b/monitor.c
@@ -451,6 +451,7 @@ static const char *monitor_event_names[] = {
     [QEVENT_BLOCK_JOB_COMPLETED] = "BLOCK_JOB_COMPLETED",
     [QEVENT_BLOCK_JOB_CANCELLED] = "BLOCK_JOB_CANCELLED",
     [QEVENT_BLOCK_JOB_ERROR] = "BLOCK_JOB_ERROR",
+    [QEVENT_BLOCK_JOB_READY] = "BLOCK_JOB_READY",
     [QEVENT_DEVICE_TRAY_MOVED] = "DEVICE_TRAY_MOVED",
     [QEVENT_SUSPEND] = "SUSPEND",
     [QEVENT_SUSPEND_DISK] = "SUSPEND_DISK",
diff --git a/monitor.h b/monitor.h
index d4c017e..b4ef955 100644
--- a/monitor.h
+++ b/monitor.h
@@ -39,6 +39,7 @@ typedef enum MonitorEvent {
     QEVENT_BLOCK_JOB_COMPLETED,
     QEVENT_BLOCK_JOB_CANCELLED,
     QEVENT_BLOCK_JOB_ERROR,
+    QEVENT_BLOCK_JOB_READY,
     QEVENT_DEVICE_TRAY_MOVED,
     QEVENT_SUSPEND,
     QEVENT_SUSPEND_DISK,
diff --git a/qapi-schema.json b/qapi-schema.json
index 9482976..37bbeca 100644
--- a/qapi-schema.json
+++ b/qapi-schema.json
@@ -2037,7 +2037,8 @@
 #
 # Manually trigger completion of an active background block operation.  This
 # is supported for drive mirroring, where it also switches the device to
-# write to the target path only.
+# write to the target path only.  The ability to complete is signaled with
+# a BLOCK_JOB_READY event.
 #
 # This command completes an active background block operation synchronously.
 # The ordering of this command's return with the BLOCK_JOB_COMPLETED event
commit aeae883baf2377b714a41529f94905046fa058f3
Author: Paolo Bonzini <pbonzini at redhat.com>
Date:   Thu Oct 18 16:49:21 2012 +0200

    block: add block-job-complete
    
    While streaming can be dropped as soon as it progressed through the whole
    image, mirroring needs to be completed manually for two reasons: 1) so that
    management knows exactly when the VM switches to the target; 2) because
    for other use cases such as replication, we may leave the operation running
    for the whole life of the virtual machine.
    
    Add a new block job command that manually completes background operations.
    
    Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>
    Signed-off-by: Kevin Wolf <kwolf at redhat.com>

diff --git a/blockdev.c b/blockdev.c
index 46e4bbd..02d3e0b 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -1266,6 +1266,19 @@ void qmp_block_job_resume(const char *device, Error **errp)
     block_job_resume(job);
 }
 
+void qmp_block_job_complete(const char *device, Error **errp)
+{
+    BlockJob *job = find_block_job(device);
+
+    if (!job) {
+        error_set(errp, QERR_BLOCK_JOB_NOT_ACTIVE, device);
+        return;
+    }
+
+    trace_qmp_block_job_complete(job);
+    block_job_complete(job, errp);
+}
+
 static void do_qmp_query_block_jobs_one(void *opaque, BlockDriverState *bs)
 {
     BlockJobInfoList **prev = opaque;
diff --git a/blockjob.c b/blockjob.c
index b5c16f3..c93a0e0 100644
--- a/blockjob.c
+++ b/blockjob.c
@@ -99,6 +99,16 @@ void block_job_set_speed(BlockJob *job, int64_t speed, Error **errp)
     job->speed = speed;
 }
 
+void block_job_complete(BlockJob *job, Error **errp)
+{
+    if (job->paused || job->cancelled || !job->job_type->complete) {
+        error_set(errp, QERR_BLOCK_JOB_NOT_READY, job->bs->device_name);
+        return;
+    }
+
+    job->job_type->complete(job, errp);
+}
+
 void block_job_pause(BlockJob *job)
 {
     job->paused = true;
diff --git a/blockjob.h b/blockjob.h
index c2261a9..c44e2ea 100644
--- a/blockjob.h
+++ b/blockjob.h
@@ -41,6 +41,12 @@ typedef struct BlockJobType {
 
     /** Optional callback for job types that support setting a speed limit */
     void (*set_speed)(BlockJob *job, int64_t speed, Error **errp);
+
+    /**
+     * Optional callback for job types whose completion must be triggered
+     * manually.
+     */
+    void (*complete)(BlockJob *job, Error **errp);
 } BlockJobType;
 
 /**
@@ -164,6 +170,15 @@ void block_job_set_speed(BlockJob *job, int64_t speed, Error **errp);
 void block_job_cancel(BlockJob *job);
 
 /**
+ * block_job_complete:
+ * @job: The job to be completed.
+ * @errp: Error object.
+ *
+ * Asynchronously complete the specified job.
+ */
+void block_job_complete(BlockJob *job, Error **errp);
+
+/**
  * block_job_is_cancelled:
  * @job: The job being queried.
  *
diff --git a/hmp-commands.hx b/hmp-commands.hx
index e0b537d..fb2f237 100644
--- a/hmp-commands.hx
+++ b/hmp-commands.hx
@@ -109,7 +109,22 @@ ETEXI
 STEXI
 @item block_job_cancel
 @findex block_job_cancel
-Stop an active block streaming operation.
+Stop an active background block operation (streaming, mirroring).
+ETEXI
+
+    {
+        .name       = "block_job_complete",
+        .args_type  = "device:B",
+        .params     = "device",
+        .help       = "stop an active background block operation",
+        .mhandler.cmd = hmp_block_job_complete,
+    },
+
+STEXI
+ at item block_job_complete
+ at findex block_job_complete
+Manually trigger completion of an active background block operation.
+For mirroring, this will switch the device to the destination path.
 ETEXI
 
     {
diff --git a/hmp.c b/hmp.c
index 2b97982..574517a 100644
--- a/hmp.c
+++ b/hmp.c
@@ -990,6 +990,16 @@ void hmp_block_job_resume(Monitor *mon, const QDict *qdict)
     hmp_handle_error(mon, &error);
 }
 
+void hmp_block_job_complete(Monitor *mon, const QDict *qdict)
+{
+    Error *error = NULL;
+    const char *device = qdict_get_str(qdict, "device");
+
+    qmp_block_job_complete(device, &error);
+
+    hmp_handle_error(mon, &error);
+}
+
 typedef struct MigrationStatus
 {
     QEMUTimer *timer;
diff --git a/hmp.h b/hmp.h
index 71ea384..7bdd23c 100644
--- a/hmp.h
+++ b/hmp.h
@@ -66,6 +66,7 @@ void hmp_block_job_set_speed(Monitor *mon, const QDict *qdict);
 void hmp_block_job_cancel(Monitor *mon, const QDict *qdict);
 void hmp_block_job_pause(Monitor *mon, const QDict *qdict);
 void hmp_block_job_resume(Monitor *mon, const QDict *qdict);
+void hmp_block_job_complete(Monitor *mon, const QDict *qdict);
 void hmp_migrate(Monitor *mon, const QDict *qdict);
 void hmp_device_del(Monitor *mon, const QDict *qdict);
 void hmp_dump_guest_memory(Monitor *mon, const QDict *qdict);
diff --git a/qapi-schema.json b/qapi-schema.json
index dfcbb67..9482976 100644
--- a/qapi-schema.json
+++ b/qapi-schema.json
@@ -2033,6 +2033,31 @@
 { 'command': 'block-job-resume', 'data': { 'device': 'str' } }
 
 ##
+# @block-job-complete:
+#
+# Manually trigger completion of an active background block operation.  This
+# is supported for drive mirroring, where it also switches the device to
+# write to the target path only.
+#
+# This command completes an active background block operation synchronously.
+# The ordering of this command's return with the BLOCK_JOB_COMPLETED event
+# is not defined.  Note that if an I/O error occurs during the processing of
+# this command: 1) the command itself will fail; 2) the error will be processed
+# according to the rerror/werror arguments that were specified when starting
+# the operation.
+#
+# A cancelled or paused job cannot be completed.
+#
+# @device: the device name
+#
+# Returns: Nothing on success
+#          If no background operation is active on this device, DeviceNotActive
+#
+# Since: 1.3
+##
+{ 'command': 'block-job-complete', 'data': { 'device': 'str' } }
+
+##
 # @ObjectTypeInfo:
 #
 # This structure describes a search result from @qom-list-types
diff --git a/qerror.h b/qerror.h
index c91708c..dacac52 100644
--- a/qerror.h
+++ b/qerror.h
@@ -54,6 +54,9 @@ void assert_no_error(Error *err);
 #define QERR_BLOCK_JOB_PAUSED \
     ERROR_CLASS_GENERIC_ERROR, "The block job for device '%s' is currently paused"
 
+#define QERR_BLOCK_JOB_NOT_READY \
+    ERROR_CLASS_GENERIC_ERROR, "The active block job for device '%s' cannot be completed"
+
 #define QERR_BLOCK_FORMAT_FEATURE_NOT_SUPPORTED \
     ERROR_CLASS_GENERIC_ERROR, "Block format '%s' used by device '%s' does not support feature '%s'"
 
diff --git a/qmp-commands.hx b/qmp-commands.hx
index 5ba8c48..4f9c711 100644
--- a/qmp-commands.hx
+++ b/qmp-commands.hx
@@ -843,6 +843,11 @@ EQMP
         .mhandler.cmd_new = qmp_marshal_input_block_job_resume,
     },
     {
+        .name       = "block-job-complete",
+        .args_type  = "device:B",
+        .mhandler.cmd_new = qmp_marshal_input_block_job_complete,
+    },
+    {
         .name       = "transaction",
         .args_type  = "actions:q",
         .mhandler.cmd_new = qmp_marshal_input_transaction,
diff --git a/trace-events b/trace-events
index e2d4580..9ab8e27 100644
--- a/trace-events
+++ b/trace-events
@@ -81,6 +81,7 @@ commit_start(void *bs, void *base, void *top, void *s, void *co, void *opaque) "
 qmp_block_job_cancel(void *job) "job %p"
 qmp_block_job_pause(void *job) "job %p"
 qmp_block_job_resume(void *job) "job %p"
+qmp_block_job_complete(void *job) "job %p"
 block_job_cb(void *bs, void *job, int ret) "bs %p job %p ret %d"
 qmp_block_stream(void *bs, void *job) "bs %p job %p"
 
commit 65f4632243f526958aa1f6b3911add98329c3796
Author: Paolo Bonzini <pbonzini at redhat.com>
Date:   Thu Oct 18 16:49:20 2012 +0200

    block: rename block_job_complete to block_job_completed
    
    The imperative will be used for the QMP command.
    
    Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>
    Signed-off-by: Kevin Wolf <kwolf at redhat.com>

diff --git a/block/commit.c b/block/commit.c
index 13d9e82..fae7958 100644
--- a/block/commit.c
+++ b/block/commit.c
@@ -160,7 +160,7 @@ exit_restore_reopen:
         bdrv_reopen(overlay_bs, s->orig_overlay_flags, NULL);
     }
 
-    block_job_complete(&s->common, ret);
+    block_job_completed(&s->common, ret);
 }
 
 static void commit_set_speed(BlockJob *job, int64_t speed, Error **errp)
diff --git a/block/stream.c b/block/stream.c
index 7926652..0c0fc7a 100644
--- a/block/stream.c
+++ b/block/stream.c
@@ -86,7 +86,7 @@ static void coroutine_fn stream_run(void *opaque)
 
     s->common.len = bdrv_getlength(bs);
     if (s->common.len < 0) {
-        block_job_complete(&s->common, s->common.len);
+        block_job_completed(&s->common, s->common.len);
         return;
     }
 
@@ -184,7 +184,7 @@ wait:
     }
 
     qemu_vfree(buf);
-    block_job_complete(&s->common, ret);
+    block_job_completed(&s->common, ret);
 }
 
 static void stream_set_speed(BlockJob *job, int64_t speed, Error **errp)
diff --git a/blockjob.c b/blockjob.c
index f55f55a..b5c16f3 100644
--- a/blockjob.c
+++ b/blockjob.c
@@ -71,7 +71,7 @@ void *block_job_create(const BlockJobType *job_type, BlockDriverState *bs,
     return job;
 }
 
-void block_job_complete(BlockJob *job, int ret)
+void block_job_completed(BlockJob *job, int ret)
 {
     BlockDriverState *bs = job->bs;
 
diff --git a/blockjob.h b/blockjob.h
index 930cc3c..c2261a9 100644
--- a/blockjob.h
+++ b/blockjob.h
@@ -135,14 +135,14 @@ void *block_job_create(const BlockJobType *job_type, BlockDriverState *bs,
 void block_job_sleep_ns(BlockJob *job, QEMUClock *clock, int64_t ns);
 
 /**
- * block_job_complete:
+ * block_job_completed:
  * @job: The job being completed.
  * @ret: The status code.
  *
  * Call the completion function that was registered at creation time, and
  * free @job.
  */
-void block_job_complete(BlockJob *job, int ret);
+void block_job_completed(BlockJob *job, int ret);
 
 /**
  * block_job_set_speed:
commit b9a9b3a4626aa099f829e2a6036bfaa0c8e47700
Author: Paolo Bonzini <pbonzini at redhat.com>
Date:   Wed Aug 1 15:23:44 2012 +0200

    block: export dirty bitmap information in query-block
    
    Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>
    Signed-off-by: Kevin Wolf <kwolf at redhat.com>

diff --git a/block.c b/block.c
index 2c3d3da..ad5e240 100644
--- a/block.c
+++ b/block.c
@@ -2837,6 +2837,13 @@ BlockInfo *bdrv_query_info(BlockDriverState *bs)
         info->io_status = bs->iostatus;
     }
 
+    if (bs->dirty_bitmap) {
+        info->has_dirty = true;
+        info->dirty = g_malloc0(sizeof(*info->dirty));
+        info->dirty->count = bdrv_get_dirty_count(bs) *
+            BDRV_SECTORS_PER_DIRTY_CHUNK * BDRV_SECTOR_SIZE;
+    }
+
     if (bs->drv) {
         info->has_inserted = true;
         info->inserted = g_malloc0(sizeof(*info->inserted));
diff --git a/qapi-schema.json b/qapi-schema.json
index 36278cc..dfcbb67 100644
--- a/qapi-schema.json
+++ b/qapi-schema.json
@@ -661,6 +661,18 @@
 { 'enum': 'BlockDeviceIoStatus', 'data': [ 'ok', 'failed', 'nospace' ] }
 
 ##
+# @BlockDirtyInfo:
+#
+# Block dirty bitmap information.
+#
+# @count: number of dirty bytes according to the dirty bitmap
+#
+# Since: 1.3
+##
+{ 'type': 'BlockDirtyInfo',
+  'data': {'count': 'int'} }
+
+##
 # @BlockInfo:
 #
 # Block device information.  This structure describes a virtual device and
@@ -679,6 +691,9 @@
 # @tray_open: #optional True if the device has a tray and it is open
 #             (only present if removable is true)
 #
+# @dirty: #optional dirty bitmap information (only present if the dirty
+#         bitmap is enabled)
+#
 # @io-status: #optional @BlockDeviceIoStatus. Only present if the device
 #             supports it and the VM is configured to stop on errors
 #
@@ -690,7 +705,8 @@
 { 'type': 'BlockInfo',
   'data': {'device': 'str', 'type': 'str', 'removable': 'bool',
            'locked': 'bool', '*inserted': 'BlockDeviceInfo',
-           '*tray_open': 'bool', '*io-status': 'BlockDeviceIoStatus'} }
+           '*tray_open': 'bool', '*io-status': 'BlockDeviceIoStatus',
+           '*dirty': 'BlockDirtyInfo' } }
 
 ##
 # @query-block:
commit 1755da16e32c15b22a521e8a38539e4b5cf367f3
Author: Paolo Bonzini <pbonzini at redhat.com>
Date:   Thu Oct 18 16:49:18 2012 +0200

    block: introduce new dirty bitmap functionality
    
    Assert that write_compressed is never used with the dirty bitmap.
    Setting the bits early is wrong, because a coroutine might concurrently
    examine them and copy incomplete data from the source.
    
    Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>
    Signed-off-by: Kevin Wolf <kwolf at redhat.com>

diff --git a/block.c b/block.c
index e06d78f..2c3d3da 100644
--- a/block.c
+++ b/block.c
@@ -2391,7 +2391,7 @@ static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
     }
 
     if (bs->dirty_bitmap) {
-        set_dirty_bitmap(bs, sector_num, nb_sectors, 1);
+        bdrv_set_dirty(bs, sector_num, nb_sectors);
     }
 
     if (bs->wr_highest_sector < sector_num + nb_sectors - 1) {
@@ -2960,9 +2960,7 @@ int bdrv_write_compressed(BlockDriverState *bs, int64_t sector_num,
     if (bdrv_check_request(bs, sector_num, nb_sectors))
         return -EIO;
 
-    if (bs->dirty_bitmap) {
-        set_dirty_bitmap(bs, sector_num, nb_sectors, 1);
-    }
+    assert(!bs->dirty_bitmap);
 
     return drv->bdrv_write_compressed(bs, sector_num, buf, nb_sectors);
 }
@@ -4269,13 +4267,54 @@ int bdrv_get_dirty(BlockDriverState *bs, int64_t sector)
 
     if (bs->dirty_bitmap &&
         (sector << BDRV_SECTOR_BITS) < bdrv_getlength(bs)) {
-        return !!(bs->dirty_bitmap[chunk / (sizeof(unsigned long) * 8)] &
-            (1UL << (chunk % (sizeof(unsigned long) * 8))));
+        return !!(bs->dirty_bitmap[chunk / BITS_PER_LONG] &
+            (1UL << (chunk % BITS_PER_LONG)));
     } else {
         return 0;
     }
 }
 
+int64_t bdrv_get_next_dirty(BlockDriverState *bs, int64_t sector)
+{
+    int64_t chunk;
+    int bit, elem;
+
+    /* Avoid an infinite loop.  */
+    assert(bs->dirty_count > 0);
+
+    sector = (sector | (BDRV_SECTORS_PER_DIRTY_CHUNK - 1)) + 1;
+    chunk = sector / (int64_t)BDRV_SECTORS_PER_DIRTY_CHUNK;
+
+    QEMU_BUILD_BUG_ON(sizeof(bs->dirty_bitmap[0]) * 8 != BITS_PER_LONG);
+    elem = chunk / BITS_PER_LONG;
+    bit = chunk % BITS_PER_LONG;
+    for (;;) {
+        if (sector >= bs->total_sectors) {
+            sector = 0;
+            bit = elem = 0;
+        }
+        if (bit == 0 && bs->dirty_bitmap[elem] == 0) {
+            sector += BDRV_SECTORS_PER_DIRTY_CHUNK * BITS_PER_LONG;
+            elem++;
+        } else {
+            if (bs->dirty_bitmap[elem] & (1UL << bit)) {
+                return sector;
+            }
+            sector += BDRV_SECTORS_PER_DIRTY_CHUNK;
+            if (++bit == BITS_PER_LONG) {
+                bit = 0;
+                elem++;
+            }
+        }
+    }
+}
+
+void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector,
+                    int nr_sectors)
+{
+    set_dirty_bitmap(bs, cur_sector, nr_sectors, 1);
+}
+
 void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector,
                       int nr_sectors)
 {
diff --git a/block.h b/block.h
index 096fa09..27b8f80 100644
--- a/block.h
+++ b/block.h
@@ -353,8 +353,9 @@ void *qemu_blockalign(BlockDriverState *bs, size_t size);
 
 void bdrv_set_dirty_tracking(BlockDriverState *bs, int enable);
 int bdrv_get_dirty(BlockDriverState *bs, int64_t sector);
-void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector,
-                      int nr_sectors);
+void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector, int nr_sectors);
+void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector, int nr_sectors);
+int64_t bdrv_get_next_dirty(BlockDriverState *bs, int64_t sector);
 int64_t bdrv_get_dirty_count(BlockDriverState *bs);
 
 void bdrv_enable_copy_on_read(BlockDriverState *bs);
commit 9156df12a4f3b3db63d1b292d081d814f02e311a
Author: Paolo Bonzini <pbonzini at redhat.com>
Date:   Thu Oct 18 16:49:17 2012 +0200

    block: add bdrv_open_backing_file
    
    Mirroring runs without the backing file so that it can be copied outside
    QEMU.  However, we need to add it at the time the job is completed and
    QEMU switches to the target.  Factor out the common bits of opening an
    image and completing a mirroring operation.
    
    The new function does not assume that the file is closed immediately after
    it returns failure, so it keeps the BDRV_O_NO_BACKING flag up-to-date.
    
    Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>
    Signed-off-by: Kevin Wolf <kwolf at redhat.com>

diff --git a/block.c b/block.c
index a723be2..e06d78f 100644
--- a/block.c
+++ b/block.c
@@ -736,6 +736,42 @@ int bdrv_file_open(BlockDriverState **pbs, const char *filename, int flags)
     return 0;
 }
 
+int bdrv_open_backing_file(BlockDriverState *bs)
+{
+    char backing_filename[PATH_MAX];
+    int back_flags, ret;
+    BlockDriver *back_drv = NULL;
+
+    if (bs->backing_hd != NULL) {
+        return 0;
+    }
+
+    bs->open_flags &= ~BDRV_O_NO_BACKING;
+    if (bs->backing_file[0] == '\0') {
+        return 0;
+    }
+
+    bs->backing_hd = bdrv_new("");
+    bdrv_get_full_backing_filename(bs, backing_filename,
+                                   sizeof(backing_filename));
+
+    if (bs->backing_format[0] != '\0') {
+        back_drv = bdrv_find_format(bs->backing_format);
+    }
+
+    /* backing files always opened read-only */
+    back_flags = bs->open_flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT);
+
+    ret = bdrv_open(bs->backing_hd, backing_filename, back_flags, back_drv);
+    if (ret < 0) {
+        bdrv_delete(bs->backing_hd);
+        bs->backing_hd = NULL;
+        bs->open_flags |= BDRV_O_NO_BACKING;
+        return ret;
+    }
+    return 0;
+}
+
 /*
  * Opens a disk image (raw, qcow2, vmdk, ...)
  */
@@ -823,24 +859,8 @@ int bdrv_open(BlockDriverState *bs, const char *filename, int flags,
     }
 
     /* If there is a backing file, use it */
-    if ((flags & BDRV_O_NO_BACKING) == 0 && bs->backing_file[0] != '\0') {
-        char backing_filename[PATH_MAX];
-        int back_flags;
-        BlockDriver *back_drv = NULL;
-
-        bs->backing_hd = bdrv_new("");
-        bdrv_get_full_backing_filename(bs, backing_filename,
-                                       sizeof(backing_filename));
-
-        if (bs->backing_format[0] != '\0') {
-            back_drv = bdrv_find_format(bs->backing_format);
-        }
-
-        /* backing files always opened read-only */
-        back_flags =
-            flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
-
-        ret = bdrv_open(bs->backing_hd, backing_filename, back_flags, back_drv);
+    if ((flags & BDRV_O_NO_BACKING) == 0) {
+        ret = bdrv_open_backing_file(bs);
         if (ret < 0) {
             bdrv_close(bs);
             return ret;
diff --git a/block.h b/block.h
index ed4b90d..096fa09 100644
--- a/block.h
+++ b/block.h
@@ -133,6 +133,7 @@ void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top);
 void bdrv_delete(BlockDriverState *bs);
 int bdrv_parse_cache_flags(const char *mode, int *flags);
 int bdrv_file_open(BlockDriverState **pbs, const char *filename, int flags);
+int bdrv_open_backing_file(BlockDriverState *bs);
 int bdrv_open(BlockDriverState *bs, const char *filename, int flags,
               BlockDriver *drv);
 BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue,
commit 9887b616619f62977682e76927a9b5a6134cc8bf
Author: Paolo Bonzini <pbonzini at redhat.com>
Date:   Thu Oct 18 16:49:16 2012 +0200

    block: add bdrv_query_stats
    
    qmp_query_blockstat cannot have errors, remove the Error argument and
    create a new public function bdrv_query_stats out of it.
    
    Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>
    Signed-off-by: Kevin Wolf <kwolf at redhat.com>

diff --git a/block.c b/block.c
index 2ba83db..a723be2 100644
--- a/block.c
+++ b/block.c
@@ -2867,8 +2867,7 @@ BlockInfoList *qmp_query_block(Error **errp)
     return head;
 }
 
-/* Consider exposing this as a full fledged QMP command */
-static BlockStats *qmp_query_blockstat(const BlockDriverState *bs, Error **errp)
+BlockStats *bdrv_query_stats(const BlockDriverState *bs)
 {
     BlockStats *s;
 
@@ -2892,7 +2891,7 @@ static BlockStats *qmp_query_blockstat(const BlockDriverState *bs, Error **errp)
 
     if (bs->file) {
         s->has_parent = true;
-        s->parent = qmp_query_blockstat(bs->file, NULL);
+        s->parent = bdrv_query_stats(bs->file);
     }
 
     return s;
@@ -2900,20 +2899,15 @@ static BlockStats *qmp_query_blockstat(const BlockDriverState *bs, Error **errp)
 
 BlockStatsList *qmp_query_blockstats(Error **errp)
 {
-    BlockStatsList *head = NULL, *cur_item = NULL;
+    BlockStatsList *head = NULL, **p_next = &head;
     BlockDriverState *bs;
 
     QTAILQ_FOREACH(bs, &bdrv_states, list) {
         BlockStatsList *info = g_malloc0(sizeof(*info));
-        info->value = qmp_query_blockstat(bs, NULL);
+        info->value = bdrv_query_stats(bs);
 
-        /* XXX: waiting for the qapi to support GSList */
-        if (!cur_item) {
-            head = cur_item = info;
-        } else {
-            cur_item->next = info;
-            cur_item = info;
-        }
+        *p_next = info;
+        p_next = &info->next;
     }
 
     return head;
diff --git a/block.h b/block.h
index 2ede16d..ed4b90d 100644
--- a/block.h
+++ b/block.h
@@ -314,6 +314,7 @@ void bdrv_get_backing_filename(BlockDriverState *bs,
 void bdrv_get_full_backing_filename(BlockDriverState *bs,
                                     char *dest, size_t sz);
 BlockInfo *bdrv_query_info(BlockDriverState *s);
+BlockStats *bdrv_query_stats(const BlockDriverState *bs);
 int bdrv_can_snapshot(BlockDriverState *bs);
 int bdrv_is_snapshot(BlockDriverState *bs);
 BlockDriverState *bdrv_snapshots(void);
commit ac84adac48963653638a1c95dd7c6ab8b4d5a4f3
Author: Paolo Bonzini <pbonzini at redhat.com>
Date:   Thu Oct 18 16:49:15 2012 +0200

    block: add bdrv_query_info
    
    Extract it out of the implementation of "info block".
    
    Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>
    Signed-off-by: Kevin Wolf <kwolf at redhat.com>

diff --git a/block.c b/block.c
index 7e26b6f..2ba83db 100644
--- a/block.c
+++ b/block.c
@@ -2799,69 +2799,69 @@ int coroutine_fn bdrv_co_is_allocated_above(BlockDriverState *top,
     return 0;
 }
 
-BlockInfoList *qmp_query_block(Error **errp)
+BlockInfo *bdrv_query_info(BlockDriverState *bs)
 {
-    BlockInfoList *head = NULL, *cur_item = NULL;
-    BlockDriverState *bs;
+    BlockInfo *info = g_malloc0(sizeof(*info));
+    info->device = g_strdup(bs->device_name);
+    info->type = g_strdup("unknown");
+    info->locked = bdrv_dev_is_medium_locked(bs);
+    info->removable = bdrv_dev_has_removable_media(bs);
 
-    QTAILQ_FOREACH(bs, &bdrv_states, list) {
-        BlockInfoList *info = g_malloc0(sizeof(*info));
+    if (bdrv_dev_has_removable_media(bs)) {
+        info->has_tray_open = true;
+        info->tray_open = bdrv_dev_is_tray_open(bs);
+    }
 
-        info->value = g_malloc0(sizeof(*info->value));
-        info->value->device = g_strdup(bs->device_name);
-        info->value->type = g_strdup("unknown");
-        info->value->locked = bdrv_dev_is_medium_locked(bs);
-        info->value->removable = bdrv_dev_has_removable_media(bs);
+    if (bdrv_iostatus_is_enabled(bs)) {
+        info->has_io_status = true;
+        info->io_status = bs->iostatus;
+    }
 
-        if (bdrv_dev_has_removable_media(bs)) {
-            info->value->has_tray_open = true;
-            info->value->tray_open = bdrv_dev_is_tray_open(bs);
+    if (bs->drv) {
+        info->has_inserted = true;
+        info->inserted = g_malloc0(sizeof(*info->inserted));
+        info->inserted->file = g_strdup(bs->filename);
+        info->inserted->ro = bs->read_only;
+        info->inserted->drv = g_strdup(bs->drv->format_name);
+        info->inserted->encrypted = bs->encrypted;
+        info->inserted->encryption_key_missing = bdrv_key_required(bs);
+
+        if (bs->backing_file[0]) {
+            info->inserted->has_backing_file = true;
+            info->inserted->backing_file = g_strdup(bs->backing_file);
         }
 
-        if (bdrv_iostatus_is_enabled(bs)) {
-            info->value->has_io_status = true;
-            info->value->io_status = bs->iostatus;
+        info->inserted->backing_file_depth = bdrv_get_backing_file_depth(bs);
+
+        if (bs->io_limits_enabled) {
+            info->inserted->bps =
+                           bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL];
+            info->inserted->bps_rd =
+                           bs->io_limits.bps[BLOCK_IO_LIMIT_READ];
+            info->inserted->bps_wr =
+                           bs->io_limits.bps[BLOCK_IO_LIMIT_WRITE];
+            info->inserted->iops =
+                           bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL];
+            info->inserted->iops_rd =
+                           bs->io_limits.iops[BLOCK_IO_LIMIT_READ];
+            info->inserted->iops_wr =
+                           bs->io_limits.iops[BLOCK_IO_LIMIT_WRITE];
         }
+    }
+    return info;
+}
 
-        if (bs->drv) {
-            info->value->has_inserted = true;
-            info->value->inserted = g_malloc0(sizeof(*info->value->inserted));
-            info->value->inserted->file = g_strdup(bs->filename);
-            info->value->inserted->ro = bs->read_only;
-            info->value->inserted->drv = g_strdup(bs->drv->format_name);
-            info->value->inserted->encrypted = bs->encrypted;
-            info->value->inserted->encryption_key_missing = bdrv_key_required(bs);
-            if (bs->backing_file[0]) {
-                info->value->inserted->has_backing_file = true;
-                info->value->inserted->backing_file = g_strdup(bs->backing_file);
-            }
+BlockInfoList *qmp_query_block(Error **errp)
+{
+    BlockInfoList *head = NULL, **p_next = &head;
+    BlockDriverState *bs;
 
-            info->value->inserted->backing_file_depth =
-                bdrv_get_backing_file_depth(bs);
-
-            if (bs->io_limits_enabled) {
-                info->value->inserted->bps =
-                               bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL];
-                info->value->inserted->bps_rd =
-                               bs->io_limits.bps[BLOCK_IO_LIMIT_READ];
-                info->value->inserted->bps_wr =
-                               bs->io_limits.bps[BLOCK_IO_LIMIT_WRITE];
-                info->value->inserted->iops =
-                               bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL];
-                info->value->inserted->iops_rd =
-                               bs->io_limits.iops[BLOCK_IO_LIMIT_READ];
-                info->value->inserted->iops_wr =
-                               bs->io_limits.iops[BLOCK_IO_LIMIT_WRITE];
-            }
-        }
+    QTAILQ_FOREACH(bs, &bdrv_states, list) {
+        BlockInfoList *info = g_malloc0(sizeof(*info));
+        info->value = bdrv_query_info(bs);
 
-        /* XXX: waiting for the qapi to support GSList */
-        if (!cur_item) {
-            head = cur_item = info;
-        } else {
-            cur_item->next = info;
-            cur_item = info;
-        }
+        *p_next = info;
+        p_next = &info->next;
     }
 
     return head;
diff --git a/block.h b/block.h
index e2d89d7..2ede16d 100644
--- a/block.h
+++ b/block.h
@@ -313,6 +313,7 @@ void bdrv_get_backing_filename(BlockDriverState *bs,
                                char *filename, int filename_size);
 void bdrv_get_full_backing_filename(BlockDriverState *bs,
                                     char *dest, size_t sz);
+BlockInfo *bdrv_query_info(BlockDriverState *s);
 int bdrv_can_snapshot(BlockDriverState *bs);
 int bdrv_is_snapshot(BlockDriverState *bs);
 BlockDriverState *bdrv_snapshots(void);
commit 587ed6be0b6331b11169da8846b8442840d5428c
Author: Corey Bryant <coreyb at linux.vnet.ibm.com>
Date:   Thu Oct 18 15:19:34 2012 -0400

    qemu-config: Add new -add-fd command line option
    
    This option can be used for passing file descriptors on the
    command line.  It mirrors the existing add-fd QMP command which
    allows an fd to be passed to QEMU via SCM_RIGHTS and added to an
    fd set.
    
    This can be combined with commands such as -drive to link file
    descriptors in an fd set to a drive:
    
        qemu-kvm -add-fd fd=3,set=2,opaque="rdwr:/path/to/file"
                 -add-fd fd=4,set=2,opaque="rdonly:/path/to/file"
                 -drive file=/dev/fdset/2,index=0,media=disk
    
    This example adds dups of fds 3 and 4, and the accompanying opaque
    strings to the fd set with ID=2.  qemu_open() already knows how
    to handle a filename of this format.  qemu_open() searches the
    corresponding fd set for an fd and when it finds a match, QEMU
    goes on to use a dup of that fd just like it would have used an
    fd that it opened itself.
    
    Signed-off-by: Corey Bryant <coreyb at linux.vnet.ibm.com>
    Reviewed-by: Eric Blake <eblake at redhat.com>
    Signed-off-by: Kevin Wolf <kwolf at redhat.com>

diff --git a/qemu-config.c b/qemu-config.c
index cd1ec21..601237d 100644
--- a/qemu-config.c
+++ b/qemu-config.c
@@ -653,6 +653,27 @@ QemuOptsList qemu_boot_opts = {
     },
 };
 
+static QemuOptsList qemu_add_fd_opts = {
+    .name = "add-fd",
+    .head = QTAILQ_HEAD_INITIALIZER(qemu_add_fd_opts.head),
+    .desc = {
+        {
+            .name = "fd",
+            .type = QEMU_OPT_NUMBER,
+            .help = "file descriptor of which a duplicate is added to fd set",
+        },{
+            .name = "set",
+            .type = QEMU_OPT_NUMBER,
+            .help = "ID of the fd set to add fd to",
+        },{
+            .name = "opaque",
+            .type = QEMU_OPT_STRING,
+            .help = "free-form string used to describe fd",
+        },
+        { /* end of list */ }
+    },
+};
+
 static QemuOptsList *vm_config_groups[32] = {
     &qemu_drive_opts,
     &qemu_chardev_opts,
@@ -669,6 +690,7 @@ static QemuOptsList *vm_config_groups[32] = {
     &qemu_boot_opts,
     &qemu_iscsi_opts,
     &qemu_sandbox_opts,
+    &qemu_add_fd_opts,
     NULL,
 };
 
diff --git a/qemu-options.hx b/qemu-options.hx
index 46f0539..a67a255 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -253,6 +253,14 @@ qemu-system-i386 -drive file=file,index=2,media=disk
 qemu-system-i386 -drive file=file,index=3,media=disk
 @end example
 
+You can open an image using pre-opened file descriptors from an fd set:
+ at example
+qemu-system-i386
+-add-fd fd=3,set=2,opaque="rdwr:/path/to/file"
+-add-fd fd=4,set=2,opaque="rdonly:/path/to/file"
+-drive file=/dev/fdset/2,index=0,media=disk
+ at end example
+
 You can connect a CDROM to the slave of ide0:
 @example
 qemu-system-i386 -drive file=file,if=ide,index=1,media=cdrom
@@ -285,6 +293,34 @@ qemu-system-i386 -hda a -hdb b
 @end example
 ETEXI
 
+DEF("add-fd", HAS_ARG, QEMU_OPTION_add_fd,
+    "-add-fd fd=fd,set=set[,opaque=opaque]\n"
+    "                Add 'fd' to fd 'set'\n", QEMU_ARCH_ALL)
+STEXI
+ at item -add-fd fd=@var{fd},set=@var{set}[,opaque=@var{opaque}]
+ at findex -add-fd
+
+Add a file descriptor to an fd set.  Valid options are:
+
+ at table @option
+ at item fd=@var{fd}
+This option defines the file descriptor of which a duplicate is added to fd set.
+The file descriptor cannot be stdin, stdout, or stderr.
+ at item set=@var{set}
+This option defines the ID of the fd set to add the file descriptor to.
+ at item opaque=@var{opaque}
+This option defines a free-form string that can be used to describe @var{fd}.
+ at end table
+
+You can open an image using pre-opened file descriptors from an fd set:
+ at example
+qemu-system-i386
+-add-fd fd=3,set=2,opaque="rdwr:/path/to/file"
+-add-fd fd=4,set=2,opaque="rdonly:/path/to/file"
+-drive file=/dev/fdset/2,index=0,media=disk
+ at end example
+ETEXI
+
 DEF("set", HAS_ARG, QEMU_OPTION_set,
     "-set group.id.arg=value\n"
     "                set <arg> parameter for item <id> of type <group>\n"
diff --git a/vl.c b/vl.c
index ee3c43a..b870caf 100644
--- a/vl.c
+++ b/vl.c
@@ -790,6 +790,78 @@ static int parse_sandbox(QemuOpts *opts, void *opaque)
     return 0;
 }
 
+#ifndef _WIN32
+static int parse_add_fd(QemuOpts *opts, void *opaque)
+{
+    int fd, dupfd, flags;
+    int64_t fdset_id;
+    const char *fd_opaque = NULL;
+
+    fd = qemu_opt_get_number(opts, "fd", -1);
+    fdset_id = qemu_opt_get_number(opts, "set", -1);
+    fd_opaque = qemu_opt_get(opts, "opaque");
+
+    if (fd < 0) {
+        qerror_report(ERROR_CLASS_GENERIC_ERROR,
+                      "fd option is required and must be non-negative");
+        return -1;
+    }
+
+    if (fd <= STDERR_FILENO) {
+        qerror_report(ERROR_CLASS_GENERIC_ERROR,
+                      "fd cannot be a standard I/O stream");
+        return -1;
+    }
+
+    /*
+     * All fds inherited across exec() necessarily have FD_CLOEXEC
+     * clear, while qemu sets FD_CLOEXEC on all other fds used internally.
+     */
+    flags = fcntl(fd, F_GETFD);
+    if (flags == -1 || (flags & FD_CLOEXEC)) {
+        qerror_report(ERROR_CLASS_GENERIC_ERROR,
+                      "fd is not valid or already in use");
+        return -1;
+    }
+
+    if (fdset_id < 0) {
+        qerror_report(ERROR_CLASS_GENERIC_ERROR,
+                      "set option is required and must be non-negative");
+        return -1;
+    }
+
+#ifdef F_DUPFD_CLOEXEC
+    dupfd = fcntl(fd, F_DUPFD_CLOEXEC, 0);
+#else
+    dupfd = dup(fd);
+    if (dupfd != -1) {
+        qemu_set_cloexec(dupfd);
+    }
+#endif
+    if (dupfd == -1) {
+        qerror_report(ERROR_CLASS_GENERIC_ERROR,
+                      "Error duplicating fd: %s", strerror(errno));
+        return -1;
+    }
+
+    /* add the duplicate fd, and optionally the opaque string, to the fd set */
+    monitor_fdset_add_fd(dupfd, true, fdset_id, fd_opaque ? true : false,
+                         fd_opaque, NULL);
+
+    return 0;
+}
+
+static int cleanup_add_fd(QemuOpts *opts, void *opaque)
+{
+    int fd;
+
+    fd = qemu_opt_get_number(opts, "fd", -1);
+    close(fd);
+
+    return 0;
+}
+#endif
+
 /***********************************************************/
 /* QEMU Block devices */
 
@@ -3309,6 +3381,18 @@ int main(int argc, char **argv, char **envp)
                     exit(0);
                 }
                 break;
+            case QEMU_OPTION_add_fd:
+#ifndef _WIN32
+                opts = qemu_opts_parse(qemu_find_opts("add-fd"), optarg, 0);
+                if (!opts) {
+                    exit(0);
+                }
+#else
+                error_report("File descriptor passing is disabled on this "
+                             "platform");
+                exit(1);
+#endif
+                break;
             default:
                 os_parse_cmd_args(popt->index, optarg);
             }
@@ -3320,6 +3404,16 @@ int main(int argc, char **argv, char **envp)
         exit(1);
     }
 
+#ifndef _WIN32
+    if (qemu_opts_foreach(qemu_find_opts("add-fd"), parse_add_fd, NULL, 1)) {
+        exit(1);
+    }
+
+    if (qemu_opts_foreach(qemu_find_opts("add-fd"), cleanup_add_fd, NULL, 1)) {
+        exit(1);
+    }
+#endif
+
     if (machine == NULL) {
         fprintf(stderr, "No machine found.\n");
         exit(1);
commit ebe52b592dd5867fce7238f49b8c0416c3eedb6c
Author: Corey Bryant <coreyb at linux.vnet.ibm.com>
Date:   Thu Oct 18 15:19:33 2012 -0400

    monitor: Prevent removing fd from set during init
    
    If an fd is added to an fd set via the command line, and it is not
    referenced by another command line option (ie. -drive), then clean
    it up after QEMU initialization is complete.
    
    Signed-off-by: Corey Bryant <coreyb at linux.vnet.ibm.com>
    Reviewed-by: Eric Blake <eblake at redhat.com>
    Signed-off-by: Kevin Wolf <kwolf at redhat.com>

diff --git a/monitor.c b/monitor.c
index 6e99049..1d3dd9a 100644
--- a/monitor.c
+++ b/monitor.c
@@ -2105,8 +2105,9 @@ static void monitor_fdset_cleanup(MonFdset *mon_fdset)
     MonFdsetFd *mon_fdset_fd_next;
 
     QLIST_FOREACH_SAFE(mon_fdset_fd, &mon_fdset->fds, next, mon_fdset_fd_next) {
-        if (mon_fdset_fd->removed ||
-                (QLIST_EMPTY(&mon_fdset->dup_fds) && mon_refcount == 0)) {
+        if ((mon_fdset_fd->removed ||
+                (QLIST_EMPTY(&mon_fdset->dup_fds) && mon_refcount == 0)) &&
+                runstate_is_running()) {
             close(mon_fdset_fd->fd);
             g_free(mon_fdset_fd->opaque);
             QLIST_REMOVE(mon_fdset_fd, next);
commit e446f70d54b4920e8ca5af509271b69eab86e37b
Author: Corey Bryant <coreyb at linux.vnet.ibm.com>
Date:   Thu Oct 18 15:19:32 2012 -0400

    monitor: Enable adding an inherited fd to an fd set
    
    qmp_add_fd() gets an fd that was received over a socket with
    SCM_RIGHTS and adds it to an fd set.  This patch adds support
    that will enable adding an fd that was inherited on the
    command line to an fd set.
    
    Note: All of the code added to monitor_fdset_add_fd(), with the
    exception of the error path for non-valid fdset-id, is code motion
    from qmp_add_fd().
    
    Signed-off-by: Corey Bryant <coreyb at linux.vnet.ibm.com>
    Reviewed-by: Eric Blake <eblake at redhat.com>
    Signed-off-by: Kevin Wolf <kwolf at redhat.com>

diff --git a/monitor.c b/monitor.c
index 3519b39..6e99049 100644
--- a/monitor.c
+++ b/monitor.c
@@ -2135,8 +2135,6 @@ AddfdInfo *qmp_add_fd(bool has_fdset_id, int64_t fdset_id, bool has_opaque,
 {
     int fd;
     Monitor *mon = cur_mon;
-    MonFdset *mon_fdset = NULL;
-    MonFdsetFd *mon_fdset_fd;
     AddfdInfo *fdinfo;
 
     fd = qemu_chr_fe_get_msgfd(mon->chr);
@@ -2145,78 +2143,12 @@ AddfdInfo *qmp_add_fd(bool has_fdset_id, int64_t fdset_id, bool has_opaque,
         goto error;
     }
 
-    if (has_fdset_id) {
-        QLIST_FOREACH(mon_fdset, &mon_fdsets, next) {
-            /* Break if match found or match impossible due to ordering by ID */
-            if (fdset_id <= mon_fdset->id) {
-                if (fdset_id < mon_fdset->id) {
-                    mon_fdset = NULL;
-                }
-                break;
-            }
-        }
+    fdinfo = monitor_fdset_add_fd(fd, has_fdset_id, fdset_id,
+                                  has_opaque, opaque, errp);
+    if (fdinfo) {
+        return fdinfo;
     }
 
-    if (mon_fdset == NULL) {
-        int64_t fdset_id_prev = -1;
-        MonFdset *mon_fdset_cur = QLIST_FIRST(&mon_fdsets);
-
-        if (has_fdset_id) {
-            if (fdset_id < 0) {
-                error_set(errp, QERR_INVALID_PARAMETER_VALUE, "fdset-id",
-                          "a non-negative value");
-                goto error;
-            }
-            /* Use specified fdset ID */
-            QLIST_FOREACH(mon_fdset, &mon_fdsets, next) {
-                mon_fdset_cur = mon_fdset;
-                if (fdset_id < mon_fdset_cur->id) {
-                    break;
-                }
-            }
-        } else {
-            /* Use first available fdset ID */
-            QLIST_FOREACH(mon_fdset, &mon_fdsets, next) {
-                mon_fdset_cur = mon_fdset;
-                if (fdset_id_prev == mon_fdset_cur->id - 1) {
-                    fdset_id_prev = mon_fdset_cur->id;
-                    continue;
-                }
-                break;
-            }
-        }
-
-        mon_fdset = g_malloc0(sizeof(*mon_fdset));
-        if (has_fdset_id) {
-            mon_fdset->id = fdset_id;
-        } else {
-            mon_fdset->id = fdset_id_prev + 1;
-        }
-
-        /* The fdset list is ordered by fdset ID */
-        if (!mon_fdset_cur) {
-            QLIST_INSERT_HEAD(&mon_fdsets, mon_fdset, next);
-        } else if (mon_fdset->id < mon_fdset_cur->id) {
-            QLIST_INSERT_BEFORE(mon_fdset_cur, mon_fdset, next);
-        } else {
-            QLIST_INSERT_AFTER(mon_fdset_cur, mon_fdset, next);
-        }
-    }
-
-    mon_fdset_fd = g_malloc0(sizeof(*mon_fdset_fd));
-    mon_fdset_fd->fd = fd;
-    mon_fdset_fd->removed = false;
-    if (has_opaque) {
-        mon_fdset_fd->opaque = g_strdup(opaque);
-    }
-    QLIST_INSERT_HEAD(&mon_fdset->fds, mon_fdset_fd, next);
-
-    fdinfo = g_malloc0(sizeof(*fdinfo));
-    fdinfo->fdset_id = mon_fdset->id;
-    fdinfo->fd = mon_fdset_fd->fd;
-
-    return fdinfo;
-
 error:
     if (fd != -1) {
         close(fd);
@@ -2301,6 +2233,87 @@ FdsetInfoList *qmp_query_fdsets(Error **errp)
     return fdset_list;
 }
 
+AddfdInfo *monitor_fdset_add_fd(int fd, bool has_fdset_id, int64_t fdset_id,
+                                bool has_opaque, const char *opaque,
+                                Error **errp)
+{
+    MonFdset *mon_fdset = NULL;
+    MonFdsetFd *mon_fdset_fd;
+    AddfdInfo *fdinfo;
+
+    if (has_fdset_id) {
+        QLIST_FOREACH(mon_fdset, &mon_fdsets, next) {
+            /* Break if match found or match impossible due to ordering by ID */
+            if (fdset_id <= mon_fdset->id) {
+                if (fdset_id < mon_fdset->id) {
+                    mon_fdset = NULL;
+                }
+                break;
+            }
+        }
+    }
+
+    if (mon_fdset == NULL) {
+        int64_t fdset_id_prev = -1;
+        MonFdset *mon_fdset_cur = QLIST_FIRST(&mon_fdsets);
+
+        if (has_fdset_id) {
+            if (fdset_id < 0) {
+                error_set(errp, QERR_INVALID_PARAMETER_VALUE, "fdset-id",
+                          "a non-negative value");
+                return NULL;
+            }
+            /* Use specified fdset ID */
+            QLIST_FOREACH(mon_fdset, &mon_fdsets, next) {
+                mon_fdset_cur = mon_fdset;
+                if (fdset_id < mon_fdset_cur->id) {
+                    break;
+                }
+            }
+        } else {
+            /* Use first available fdset ID */
+            QLIST_FOREACH(mon_fdset, &mon_fdsets, next) {
+                mon_fdset_cur = mon_fdset;
+                if (fdset_id_prev == mon_fdset_cur->id - 1) {
+                    fdset_id_prev = mon_fdset_cur->id;
+                    continue;
+                }
+                break;
+            }
+        }
+
+        mon_fdset = g_malloc0(sizeof(*mon_fdset));
+        if (has_fdset_id) {
+            mon_fdset->id = fdset_id;
+        } else {
+            mon_fdset->id = fdset_id_prev + 1;
+        }
+
+        /* The fdset list is ordered by fdset ID */
+        if (!mon_fdset_cur) {
+            QLIST_INSERT_HEAD(&mon_fdsets, mon_fdset, next);
+        } else if (mon_fdset->id < mon_fdset_cur->id) {
+            QLIST_INSERT_BEFORE(mon_fdset_cur, mon_fdset, next);
+        } else {
+            QLIST_INSERT_AFTER(mon_fdset_cur, mon_fdset, next);
+        }
+    }
+
+    mon_fdset_fd = g_malloc0(sizeof(*mon_fdset_fd));
+    mon_fdset_fd->fd = fd;
+    mon_fdset_fd->removed = false;
+    if (has_opaque) {
+        mon_fdset_fd->opaque = g_strdup(opaque);
+    }
+    QLIST_INSERT_HEAD(&mon_fdset->fds, mon_fdset_fd, next);
+
+    fdinfo = g_malloc0(sizeof(*fdinfo));
+    fdinfo->fdset_id = mon_fdset->id;
+    fdinfo->fd = mon_fdset_fd->fd;
+
+    return fdinfo;
+}
+
 int monitor_fdset_get_fd(int64_t fdset_id, int flags)
 {
 #ifndef _WIN32
diff --git a/monitor.h b/monitor.h
index b6e7d95..d4c017e 100644
--- a/monitor.h
+++ b/monitor.h
@@ -90,6 +90,9 @@ int qmp_qom_set(Monitor *mon, const QDict *qdict, QObject **ret);
 
 int qmp_qom_get(Monitor *mon, const QDict *qdict, QObject **ret);
 
+AddfdInfo *monitor_fdset_add_fd(int fd, bool has_fdset_id, int64_t fdset_id,
+                                bool has_opaque, const char *opaque,
+                                Error **errp);
 int monitor_fdset_get_fd(int64_t fdset_id, int flags);
 int monitor_fdset_dup_fd_add(int64_t fdset_id, int dup_fd);
 int monitor_fdset_dup_fd_remove(int dup_fd);
commit 9ac54af0c35d3f931653efae5698ef0f465eac7c
Author: Corey Bryant <coreyb at linux.vnet.ibm.com>
Date:   Thu Oct 18 15:19:31 2012 -0400

    monitor: Allow add-fd to any specified fd set
    
    The first call to add an fd to an fd set was previously not
    allowed to choose the fd set ID.  The ID was generated as
    the first available and ensuing calls could add more fds by
    specifying the fd set ID.  This change allows users to
    choose the fd set ID on the first call.
    
    Signed-off-by: Corey Bryant <coreyb at linux.vnet.ibm.com>
    Reviewed-by: Eric Blake <eblake at redhat.com>
    Signed-off-by: Kevin Wolf <kwolf at redhat.com>

diff --git a/monitor.c b/monitor.c
index d17ae2d..3519b39 100644
--- a/monitor.c
+++ b/monitor.c
@@ -2135,7 +2135,7 @@ AddfdInfo *qmp_add_fd(bool has_fdset_id, int64_t fdset_id, bool has_opaque,
 {
     int fd;
     Monitor *mon = cur_mon;
-    MonFdset *mon_fdset;
+    MonFdset *mon_fdset = NULL;
     MonFdsetFd *mon_fdset_fd;
     AddfdInfo *fdinfo;
 
@@ -2147,34 +2147,54 @@ AddfdInfo *qmp_add_fd(bool has_fdset_id, int64_t fdset_id, bool has_opaque,
 
     if (has_fdset_id) {
         QLIST_FOREACH(mon_fdset, &mon_fdsets, next) {
-            if (mon_fdset->id == fdset_id) {
+            /* Break if match found or match impossible due to ordering by ID */
+            if (fdset_id <= mon_fdset->id) {
+                if (fdset_id < mon_fdset->id) {
+                    mon_fdset = NULL;
+                }
                 break;
             }
         }
-        if (mon_fdset == NULL) {
-            error_set(errp, QERR_INVALID_PARAMETER_VALUE, "fdset-id",
-                      "an existing fdset-id");
-            goto error;
-        }
-    } else {
+    }
+
+    if (mon_fdset == NULL) {
         int64_t fdset_id_prev = -1;
         MonFdset *mon_fdset_cur = QLIST_FIRST(&mon_fdsets);
 
-        /* Use first available fdset ID */
-        QLIST_FOREACH(mon_fdset, &mon_fdsets, next) {
-            mon_fdset_cur = mon_fdset;
-            if (fdset_id_prev == mon_fdset_cur->id - 1) {
-                fdset_id_prev = mon_fdset_cur->id;
-                continue;
+        if (has_fdset_id) {
+            if (fdset_id < 0) {
+                error_set(errp, QERR_INVALID_PARAMETER_VALUE, "fdset-id",
+                          "a non-negative value");
+                goto error;
+            }
+            /* Use specified fdset ID */
+            QLIST_FOREACH(mon_fdset, &mon_fdsets, next) {
+                mon_fdset_cur = mon_fdset;
+                if (fdset_id < mon_fdset_cur->id) {
+                    break;
+                }
+            }
+        } else {
+            /* Use first available fdset ID */
+            QLIST_FOREACH(mon_fdset, &mon_fdsets, next) {
+                mon_fdset_cur = mon_fdset;
+                if (fdset_id_prev == mon_fdset_cur->id - 1) {
+                    fdset_id_prev = mon_fdset_cur->id;
+                    continue;
+                }
+                break;
             }
-            break;
         }
 
         mon_fdset = g_malloc0(sizeof(*mon_fdset));
-        mon_fdset->id = fdset_id_prev + 1;
+        if (has_fdset_id) {
+            mon_fdset->id = fdset_id;
+        } else {
+            mon_fdset->id = fdset_id_prev + 1;
+        }
 
         /* The fdset list is ordered by fdset ID */
-        if (mon_fdset->id == 0) {
+        if (!mon_fdset_cur) {
             QLIST_INSERT_HEAD(&mon_fdsets, mon_fdset, next);
         } else if (mon_fdset->id < mon_fdset_cur->id) {
             QLIST_INSERT_BEFORE(mon_fdset_cur, mon_fdset, next);
diff --git a/qapi-schema.json b/qapi-schema.json
index c615ee2..36278cc 100644
--- a/qapi-schema.json
+++ b/qapi-schema.json
@@ -2625,7 +2625,7 @@
 #
 # Returns: @AddfdInfo on success
 #          If file descriptor was not received, FdNotSupplied
-#          If @fdset-id does not exist, InvalidParameterValue
+#          If @fdset-id is a negative value, InvalidParameterValue
 #
 # Notes: The list of fd sets is shared by all monitor connections.
 #
commit 80168bff43760bde98388480dc7c93f94693421c
Author: Luiz Capitulino <lcapitulino at redhat.com>
Date:   Wed Oct 17 16:45:25 2012 -0300

    block: bdrv_create(): don't leak cco.filename on error
    
    Signed-off-by: Luiz Capitulino <lcapitulino at redhat.com>
    Reviewed-by: Stefan Hajnoczi <stefanha at redhat.com>
    Signed-off-by: Kevin Wolf <kwolf at redhat.com>

diff --git a/block.c b/block.c
index 5e7fc9e..7e26b6f 100644
--- a/block.c
+++ b/block.c
@@ -379,7 +379,8 @@ int bdrv_create(BlockDriver *drv, const char* filename,
     };
 
     if (!drv->bdrv_create) {
-        return -ENOTSUP;
+        ret = -ENOTSUP;
+        goto out;
     }
 
     if (qemu_in_coroutine()) {
@@ -394,8 +395,9 @@ int bdrv_create(BlockDriver *drv, const char* filename,
     }
 
     ret = cco.ret;
-    g_free(cco.filename);
 
+out:
+    g_free(cco.filename);
     return ret;
 }
 
commit e53575606aa5567bde3246cdc3af1fdc757f77c8
Author: Kashyap Chamarthy <kashyap.cv at gmail.com>
Date:   Thu Oct 18 11:25:34 2012 +0530

    qemu-img: document 'info --backing-chain'
    
    Signed-off-by: Kashyap Chamarthy <kashyap.cv at gmail.com>
    Reviewed-by: Stefan Hajnoczi <stefanha at redhat.com>
    Signed-off-by: Kevin Wolf <kwolf at redhat.com>

diff --git a/qemu-img-cmds.hx b/qemu-img-cmds.hx
index 0ef82e9..a181363 100644
--- a/qemu-img-cmds.hx
+++ b/qemu-img-cmds.hx
@@ -34,9 +34,9 @@ STEXI
 ETEXI
 
 DEF("info", img_info,
-    "info [-f fmt] [--output=ofmt] filename")
+    "info [-f fmt] [--output=ofmt] [--backing-chain] filename")
 STEXI
- at item info [-f @var{fmt}] [--output=@var{ofmt}] @var{filename}
+ at item info [-f @var{fmt}] [--output=@var{ofmt}] [--backing-chain] @var{filename}
 ETEXI
 
 DEF("snapshot", img_snapshot,
diff --git a/qemu-img.texi b/qemu-img.texi
index 42ec392..60b83fc 100644
--- a/qemu-img.texi
+++ b/qemu-img.texi
@@ -28,6 +28,10 @@ Command parameters:
 is the disk image format. It is guessed automatically in most cases. See below
 for a description of the supported disk formats.
 
+ at item --backing-chain
+will enumerate information about backing files in a disk image chain. Refer
+below for further description.
+
 @item size
 is the disk image size in bytes. Optional suffixes @code{k} or @code{K}
 (kilobyte, 1024) @code{M} (megabyte, 1024k) and @code{G} (gigabyte, 1024M)
@@ -129,7 +133,7 @@ created as a copy on write image of the specified base image; the
 @var{backing_file} should have the same content as the input's base image,
 however the path, image format, etc may differ.
 
- at item info [-f @var{fmt}] [--output=@var{ofmt}] @var{filename}
+ at item info [-f @var{fmt}] [--output=@var{ofmt}] [--backing-chain] @var{filename}
 
 Give information about the disk image @var{filename}. Use it in
 particular to know the size reserved on disk which can be different
@@ -137,6 +141,21 @@ from the displayed size. If VM snapshots are stored in the disk image,
 they are displayed too. The command can output in the format @var{ofmt}
 which is either @code{human} or @code{json}.
 
+If a disk image has a backing file chain, information about each disk image in
+the chain can be recursively enumerated by using the option @code{--backing-chain}.
+
+For instance, if you have an image chain like:
+
+ at example
+base.qcow2 <- snap1.qcow2 <- snap2.qcow2
+ at end example
+
+To enumerate information about each disk image in the above chain, starting from top to base, do:
+
+ at example
+qemu-img info --backing-chain snap2.qcow2
+ at end example
+
 @item snapshot [-l | -a @var{snapshot} | -c @var{snapshot} | -d @var{snapshot} ] @var{filename}
 
 List, apply, create or delete snapshots in image @var{filename}.
commit 514d9da5a9a820b43a2cb90b439dd570a7835114
Author: Stefan Hajnoczi <stefanha at redhat.com>
Date:   Wed Oct 17 14:02:32 2012 +0200

    qemu-iotests: Add 043 backing file chain infinite loop test
    
    This new test verifies that qemu-img info --backing-chain safely aborts
    when an image file has a backing file infinite loop.
    
    Signed-off-by: Stefan Hajnoczi <stefanha at redhat.com>
    Signed-off-by: Kevin Wolf <kwolf at redhat.com>

diff --git a/tests/qemu-iotests/043 b/tests/qemu-iotests/043
new file mode 100755
index 0000000..3ba08dc
--- /dev/null
+++ b/tests/qemu-iotests/043
@@ -0,0 +1,95 @@
+#!/bin/bash
+#
+# Test that qemu-img info --backing-chain detects infinite loops
+#
+# Copyright (C) 2012 Red Hat, Inc.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+
+# creator
+owner=stefanha at redhat.com
+
+seq=`basename $0`
+echo "QA output created by $seq"
+
+here=`pwd`
+tmp=/tmp/$$
+status=1	# failure is the default!
+
+_cleanup()
+{
+    _cleanup_test_img
+    rm -f $TEST_IMG.[123].base
+}
+trap "_cleanup; exit \$status" 0 1 2 3 15
+
+# get standard environment, filters and checks
+. ./common.rc
+. ./common.filter
+
+# Any format supporting backing files
+_supported_fmt qcow qcow2 vmdk qed
+_supported_proto generic
+_supported_os Linux
+
+
+size=128M
+_make_test_img $size
+$QEMU_IMG rebase -u -b $TEST_IMG $TEST_IMG
+
+echo
+echo "== backing file references self =="
+_img_info --backing-chain
+
+_make_test_img $size
+mv $TEST_IMG $TEST_IMG.base
+_make_test_img -b $TEST_IMG.base $size
+$QEMU_IMG rebase -u -b $TEST_IMG $TEST_IMG.base
+
+echo
+echo "== parent references self =="
+_img_info --backing-chain
+
+_make_test_img $size
+mv $TEST_IMG $TEST_IMG.1.base
+_make_test_img -b $TEST_IMG.1.base $size
+mv $TEST_IMG $TEST_IMG.2.base
+_make_test_img -b $TEST_IMG.2.base $size
+mv $TEST_IMG $TEST_IMG.3.base
+_make_test_img -b $TEST_IMG.3.base $size
+$QEMU_IMG rebase -u -b $TEST_IMG.2.base $TEST_IMG.1.base
+
+echo
+echo "== ancestor references another ancestor =="
+_img_info --backing-chain
+
+_make_test_img $size
+mv $TEST_IMG $TEST_IMG.1.base
+_make_test_img -b $TEST_IMG.1.base $size
+mv $TEST_IMG $TEST_IMG.2.base
+_make_test_img -b $TEST_IMG.2.base $size
+
+echo
+echo "== finite chain of length 3 (human) =="
+_img_info --backing-chain
+
+echo
+echo "== finite chain of length 3 (json) =="
+_img_info --backing-chain --output=json
+
+# success, all done
+echo "*** done"
+rm -f $seq.full
+status=0
diff --git a/tests/qemu-iotests/043.out b/tests/qemu-iotests/043.out
new file mode 100644
index 0000000..ad23337
--- /dev/null
+++ b/tests/qemu-iotests/043.out
@@ -0,0 +1,66 @@
+QA output created by 043
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134217728 
+
+== backing file references self ==
+qemu-img: Backing file 'TEST_DIR/t.IMGFMT' creates an infinite loop.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134217728 
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134217728 backing_file='TEST_DIR/t.IMGFMT.base' 
+
+== parent references self ==
+qemu-img: Backing file 'TEST_DIR/t.IMGFMT' creates an infinite loop.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134217728 
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134217728 backing_file='TEST_DIR/t.IMGFMT.1.base' 
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134217728 backing_file='TEST_DIR/t.IMGFMT.2.base' 
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134217728 backing_file='TEST_DIR/t.IMGFMT.3.base' 
+
+== ancestor references another ancestor ==
+qemu-img: Backing file 'TEST_DIR/t.IMGFMT.2.base' creates an infinite loop.
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134217728 
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134217728 backing_file='TEST_DIR/t.IMGFMT.1.base' 
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134217728 backing_file='TEST_DIR/t.IMGFMT.2.base' 
+
+== finite chain of length 3 (human) ==
+image: TEST_DIR/t.IMGFMT
+file format: IMGFMT
+virtual size: 128M (134217728 bytes)
+cluster_size: 65536
+backing file: TEST_DIR/t.IMGFMT.2.base
+
+image: TEST_DIR/t.IMGFMT.2.base
+file format: IMGFMT
+virtual size: 128M (134217728 bytes)
+cluster_size: 65536
+backing file: TEST_DIR/t.IMGFMT.1.base
+
+image: TEST_DIR/t.IMGFMT.1.base
+file format: IMGFMT
+virtual size: 128M (134217728 bytes)
+cluster_size: 65536
+
+== finite chain of length 3 (json) ==
+[
+    {
+        "virtual-size": 134217728, 
+        "filename": "TEST_DIR/t.IMGFMT", 
+        "cluster-size": 65536, 
+        "format": "IMGFMT", 
+        "backing-filename": "TEST_DIR/t.IMGFMT.2.base", 
+        "dirty-flag": false
+    }, 
+    {
+        "virtual-size": 134217728, 
+        "filename": "TEST_DIR/t.IMGFMT.2.base", 
+        "cluster-size": 65536, 
+        "format": "IMGFMT", 
+        "backing-filename": "TEST_DIR/t.IMGFMT.1.base", 
+        "dirty-flag": false
+    }, 
+    {
+        "virtual-size": 134217728, 
+        "filename": "TEST_DIR/t.IMGFMT.1.base", 
+        "cluster-size": 65536, 
+        "format": "IMGFMT", 
+        "dirty-flag": false
+    }
+]
+*** done
diff --git a/tests/qemu-iotests/common.rc b/tests/qemu-iotests/common.rc
index d534e94..334534f 100644
--- a/tests/qemu-iotests/common.rc
+++ b/tests/qemu-iotests/common.rc
@@ -145,6 +145,16 @@ _check_test_img()
     	sed -e 's/qemu-img\: This image format does not support checks/No errors were found on the image./'
 }
 
+_img_info()
+{
+    $QEMU_IMG info "$@" $TEST_IMG 2>&1 | \
+        sed -e "s#$IMGPROTO:$TEST_DIR#TEST_DIR#g" \
+            -e "s#$TEST_DIR#TEST_DIR#g" \
+            -e "s#$IMGFMT#IMGFMT#g" \
+            -e "/^disk size:/ D" \
+            -e "/actual-size/ D"
+}
+
 _get_pids_by_name()
 {
     if [ $# -ne 1 ]
diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group
index d5f9ab0..7407492 100644
--- a/tests/qemu-iotests/group
+++ b/tests/qemu-iotests/group
@@ -48,3 +48,4 @@
 039 rw auto
 040 rw auto
 042 rw auto quick
+043 rw auto backing
commit 9699bf0d06eb42625779216586a28d19d8fc005d
Author: Stefan Hajnoczi <stefanha at redhat.com>
Date:   Wed Oct 17 14:02:31 2012 +0200

    qemu-img: Add --backing-chain option to info command
    
    The qemu-img info --backing-chain option enumerates the backing file
    chain.  For example, for base.qcow2 <- snap1.qcow2 <- snap2.qcow2 the
    output becomes:
    
      $ qemu-img info --backing-chain snap2.qcow2
      image: snap2.qcow2
      file format: qcow2
      virtual size: 100M (104857600 bytes)
      disk size: 196K
      cluster_size: 65536
      backing file: snap1.qcow2
      backing file format: qcow2
    
      image: snap1.qcow2
      file format: qcow2
      virtual size: 100M (104857600 bytes)
      disk size: 196K
      cluster_size: 65536
      backing file: base.qcow2
      backing file format: qcow2
    
      image: base.qcow2
      file format: qcow2
      virtual size: 100M (104857600 bytes)
      disk size: 136K
      cluster_size: 65536
    
    Signed-off-by: Stefan Hajnoczi <stefanha at redhat.com>
    Signed-off-by: Kevin Wolf <kwolf at redhat.com>

diff --git a/qemu-img.c b/qemu-img.c
index c092ccf..b17bddd 100644
--- a/qemu-img.c
+++ b/qemu-img.c
@@ -1112,6 +1112,23 @@ static void dump_snapshots(BlockDriverState *bs)
     g_free(sn_tab);
 }
 
+static void dump_json_image_info_list(ImageInfoList *list)
+{
+    Error *errp = NULL;
+    QString *str;
+    QmpOutputVisitor *ov = qmp_output_visitor_new();
+    QObject *obj;
+    visit_type_ImageInfoList(qmp_output_get_visitor(ov),
+                             &list, NULL, &errp);
+    obj = qmp_output_get_qobject(ov);
+    str = qobject_to_json_pretty(obj);
+    assert(str != NULL);
+    printf("%s\n", qstring_get_str(str));
+    qobject_decref(obj);
+    qmp_output_visitor_cleanup(ov);
+    QDECREF(str);
+}
+
 static void collect_snapshots(BlockDriverState *bs , ImageInfo *info)
 {
     int i, sn_count;
@@ -1251,9 +1268,129 @@ static void dump_human_image_info(ImageInfo *info)
             printf("backing file format: %s\n", info->backing_filename_format);
         }
     }
+
+    if (info->has_snapshots) {
+        SnapshotInfoList *elem;
+        char buf[256];
+
+        printf("Snapshot list:\n");
+        printf("%s\n", bdrv_snapshot_dump(buf, sizeof(buf), NULL));
+
+        /* Ideally bdrv_snapshot_dump() would operate on SnapshotInfoList but
+         * we convert to the block layer's native QEMUSnapshotInfo for now.
+         */
+        for (elem = info->snapshots; elem; elem = elem->next) {
+            QEMUSnapshotInfo sn = {
+                .vm_state_size = elem->value->vm_state_size,
+                .date_sec = elem->value->date_sec,
+                .date_nsec = elem->value->date_nsec,
+                .vm_clock_nsec = elem->value->vm_clock_sec * 1000000000ULL +
+                                 elem->value->vm_clock_nsec,
+            };
+
+            pstrcpy(sn.id_str, sizeof(sn.id_str), elem->value->id);
+            pstrcpy(sn.name, sizeof(sn.name), elem->value->name);
+            printf("%s\n", bdrv_snapshot_dump(buf, sizeof(buf), &sn));
+        }
+    }
+}
+
+static void dump_human_image_info_list(ImageInfoList *list)
+{
+    ImageInfoList *elem;
+    bool delim = false;
+
+    for (elem = list; elem; elem = elem->next) {
+        if (delim) {
+            printf("\n");
+        }
+        delim = true;
+
+        dump_human_image_info(elem->value);
+    }
+}
+
+static gboolean str_equal_func(gconstpointer a, gconstpointer b)
+{
+    return strcmp(a, b) == 0;
+}
+
+/**
+ * Open an image file chain and return an ImageInfoList
+ *
+ * @filename: topmost image filename
+ * @fmt: topmost image format (may be NULL to autodetect)
+ * @chain: true  - enumerate entire backing file chain
+ *         false - only topmost image file
+ *
+ * Returns a list of ImageInfo objects or NULL if there was an error opening an
+ * image file.  If there was an error a message will have been printed to
+ * stderr.
+ */
+static ImageInfoList *collect_image_info_list(const char *filename,
+                                              const char *fmt,
+                                              bool chain)
+{
+    ImageInfoList *head = NULL;
+    ImageInfoList **last = &head;
+    GHashTable *filenames;
+
+    filenames = g_hash_table_new_full(g_str_hash, str_equal_func, NULL, NULL);
+
+    while (filename) {
+        BlockDriverState *bs;
+        ImageInfo *info;
+        ImageInfoList *elem;
+
+        if (g_hash_table_lookup_extended(filenames, filename, NULL, NULL)) {
+            error_report("Backing file '%s' creates an infinite loop.",
+                         filename);
+            goto err;
+        }
+        g_hash_table_insert(filenames, (gpointer)filename, NULL);
+
+        bs = bdrv_new_open(filename, fmt, BDRV_O_FLAGS | BDRV_O_NO_BACKING,
+                           false);
+        if (!bs) {
+            goto err;
+        }
+
+        info = g_new0(ImageInfo, 1);
+        collect_image_info(bs, info, filename, fmt);
+        collect_snapshots(bs, info);
+
+        elem = g_new0(ImageInfoList, 1);
+        elem->value = info;
+        *last = elem;
+        last = &elem->next;
+
+        bdrv_delete(bs);
+
+        filename = fmt = NULL;
+        if (chain) {
+            if (info->has_full_backing_filename) {
+                filename = info->full_backing_filename;
+            } else if (info->has_backing_filename) {
+                filename = info->backing_filename;
+            }
+            if (info->has_backing_filename_format) {
+                fmt = info->backing_filename_format;
+            }
+        }
+    }
+    g_hash_table_destroy(filenames);
+    return head;
+
+err:
+    qapi_free_ImageInfoList(head);
+    g_hash_table_destroy(filenames);
+    return NULL;
 }
 
-enum {OPTION_OUTPUT = 256};
+enum {
+    OPTION_OUTPUT = 256,
+    OPTION_BACKING_CHAIN = 257,
+};
 
 typedef enum OutputFormat {
     OFORMAT_JSON,
@@ -1264,9 +1401,9 @@ static int img_info(int argc, char **argv)
 {
     int c;
     OutputFormat output_format = OFORMAT_HUMAN;
+    bool chain = false;
     const char *filename, *fmt, *output;
-    BlockDriverState *bs;
-    ImageInfo *info;
+    ImageInfoList *list;
 
     fmt = NULL;
     output = NULL;
@@ -1276,6 +1413,7 @@ static int img_info(int argc, char **argv)
             {"help", no_argument, 0, 'h'},
             {"format", required_argument, 0, 'f'},
             {"output", required_argument, 0, OPTION_OUTPUT},
+            {"backing-chain", no_argument, 0, OPTION_BACKING_CHAIN},
             {0, 0, 0, 0}
         };
         c = getopt_long(argc, argv, "f:h",
@@ -1294,6 +1432,9 @@ static int img_info(int argc, char **argv)
         case OPTION_OUTPUT:
             output = optarg;
             break;
+        case OPTION_BACKING_CHAIN:
+            chain = true;
+            break;
         }
     }
     if (optind >= argc) {
@@ -1310,27 +1451,25 @@ static int img_info(int argc, char **argv)
         return 1;
     }
 
-    bs = bdrv_new_open(filename, fmt, BDRV_O_FLAGS | BDRV_O_NO_BACKING, false);
-    if (!bs) {
+    list = collect_image_info_list(filename, fmt, chain);
+    if (!list) {
         return 1;
     }
 
-    info = g_new0(ImageInfo, 1);
-    collect_image_info(bs, info, filename, fmt);
-
     switch (output_format) {
     case OFORMAT_HUMAN:
-        dump_human_image_info(info);
-        dump_snapshots(bs);
+        dump_human_image_info_list(list);
         break;
     case OFORMAT_JSON:
-        collect_snapshots(bs, info);
-        dump_json_image_info(info);
+        if (chain) {
+            dump_json_image_info_list(list);
+        } else {
+            dump_json_image_info(list->value);
+        }
         break;
     }
 
-    qapi_free_ImageInfo(info);
-    bdrv_delete(bs);
+    qapi_free_ImageInfoList(list);
     return 0;
 }
 
commit 6bf0d1f478a5a425c0c024994375592805d591c0
Author: Jeff Cody <jcody at redhat.com>
Date:   Tue Oct 16 15:49:12 2012 -0400

    qemu-iotests: add relative backing file tests for block-commit (040)
    
    The previous block commit used absolute filenames for all block-commit
    images and commands; this adds relative filenames for the same tests.
    
    Signed-off-by: Jeff Cody <jcody at redhat.com>
    Reviewed-by: Eric Blake <eblake at redhat.com>
    Signed-off-by: Kevin Wolf <kwolf at redhat.com>

diff --git a/tests/qemu-iotests/040 b/tests/qemu-iotests/040
index 0fa6441..aad535a 100755
--- a/tests/qemu-iotests/040
+++ b/tests/qemu-iotests/040
@@ -26,6 +26,7 @@ import os
 import iotests
 from iotests import qemu_img, qemu_io
 import struct
+import errno
 
 backing_img = os.path.join(iotests.test_dir, 'backing.img')
 mid_img = os.path.join(iotests.test_dir, 'mid.img')
@@ -143,6 +144,107 @@ class TestSingleDrive(ImageCommitTestCase):
         self.assert_qmp(result, 'error/class', 'GenericError')
         self.assert_qmp(result, 'error/desc', "Parameter 'top' is missing")
 
+class TestRelativePaths(ImageCommitTestCase):
+    image_len = 1 * 1024 * 1024
+    test_len = 1 * 1024 * 256
+
+    dir1 = "dir1"
+    dir2 = "dir2/"
+    dir3 = "dir2/dir3/"
+
+    test_img = os.path.join(iotests.test_dir, dir3, 'test.img')
+    mid_img = "../mid.img"
+    backing_img = "../dir1/backing.img"
+
+    backing_img_abs = os.path.join(iotests.test_dir, dir1, 'backing.img')
+    mid_img_abs = os.path.join(iotests.test_dir, dir2, 'mid.img')
+
+    def setUp(self):
+        try:
+            os.mkdir(os.path.join(iotests.test_dir, self.dir1))
+            os.mkdir(os.path.join(iotests.test_dir, self.dir2))
+            os.mkdir(os.path.join(iotests.test_dir, self.dir3))
+        except OSError as exception:
+            if exception.errno != errno.EEXIST:
+                raise
+        self.create_image(self.backing_img_abs, TestRelativePaths.image_len)
+        qemu_img('create', '-f', iotests.imgfmt, '-o', 'backing_file=%s' % self.backing_img_abs, self.mid_img_abs)
+        qemu_img('create', '-f', iotests.imgfmt, '-o', 'backing_file=%s' % self.mid_img_abs, self.test_img)
+        qemu_img('rebase', '-u', '-b', self.backing_img, self.mid_img_abs)
+        qemu_img('rebase', '-u', '-b', self.mid_img, self.test_img)
+        qemu_io('-c', 'write -P 0xab 0 524288', self.backing_img_abs)
+        qemu_io('-c', 'write -P 0xef 524288 524288', self.mid_img_abs)
+        self.vm = iotests.VM().add_drive(self.test_img)
+        self.vm.launch()
+
+    def tearDown(self):
+        self.vm.shutdown()
+        os.remove(self.test_img)
+        os.remove(self.mid_img_abs)
+        os.remove(self.backing_img_abs)
+        try:
+            os.rmdir(os.path.join(iotests.test_dir, self.dir1))
+            os.rmdir(os.path.join(iotests.test_dir, self.dir3))
+            os.rmdir(os.path.join(iotests.test_dir, self.dir2))
+        except OSError as exception:
+            if exception.errno != errno.EEXIST and exception.errno != errno.ENOTEMPTY:
+                raise
+
+    def test_commit(self):
+        self.assert_no_active_commit()
+        result = self.vm.qmp('block-commit', device='drive0', top='%s' % self.mid_img)
+        self.assert_qmp(result, 'return', {})
+
+        completed = False
+        while not completed:
+            for event in self.vm.get_qmp_events(wait=True):
+                if event['event'] == 'BLOCK_JOB_COMPLETED':
+                    self.assert_qmp(event, 'data/type', 'commit')
+                    self.assert_qmp(event, 'data/device', 'drive0')
+                    self.assert_qmp(event, 'data/offset', self.image_len)
+                    self.assert_qmp(event, 'data/len', self.image_len)
+                    completed = True
+
+        self.assert_no_active_commit()
+        self.vm.shutdown()
+
+        self.assertEqual(-1, qemu_io('-c', 'read -P 0xab 0 524288', self.backing_img_abs).find("verification failed"))
+        self.assertEqual(-1, qemu_io('-c', 'read -P 0xef 524288 524288', self.backing_img_abs).find("verification failed"))
+
+    def test_device_not_found(self):
+        result = self.vm.qmp('block-commit', device='nonexistent', top='%s' % self.mid_img)
+        self.assert_qmp(result, 'error/class', 'DeviceNotFound')
+
+    def test_top_same_base(self):
+        self.assert_no_active_commit()
+        result = self.vm.qmp('block-commit', device='drive0', top='%s' % self.mid_img, base='%s' % self.mid_img)
+        self.assert_qmp(result, 'error/class', 'GenericError')
+        self.assert_qmp(result, 'error/desc', 'Base \'%s\' not found' % self.mid_img)
+
+    def test_top_invalid(self):
+        self.assert_no_active_commit()
+        result = self.vm.qmp('block-commit', device='drive0', top='badfile', base='%s' % self.backing_img)
+        self.assert_qmp(result, 'error/class', 'GenericError')
+        self.assert_qmp(result, 'error/desc', 'Top image file badfile not found')
+
+    def test_base_invalid(self):
+        self.assert_no_active_commit()
+        result = self.vm.qmp('block-commit', device='drive0', top='%s' % self.mid_img, base='badfile')
+        self.assert_qmp(result, 'error/class', 'GenericError')
+        self.assert_qmp(result, 'error/desc', 'Base \'badfile\' not found')
+
+    def test_top_is_active(self):
+        self.assert_no_active_commit()
+        result = self.vm.qmp('block-commit', device='drive0', top='%s' % self.test_img, base='%s' % self.backing_img)
+        self.assert_qmp(result, 'error/class', 'GenericError')
+        self.assert_qmp(result, 'error/desc', 'Top image as the active layer is currently unsupported')
+
+    def test_top_and_base_reversed(self):
+        self.assert_no_active_commit()
+        result = self.vm.qmp('block-commit', device='drive0', top='%s' % self.backing_img, base='%s' % self.mid_img)
+        self.assert_qmp(result, 'error/class', 'GenericError')
+        self.assert_qmp(result, 'error/desc', 'Base \'%s\' not found' % self.mid_img)
+
 
 class TestSetSpeed(ImageCommitTestCase):
     image_len = 80 * 1024 * 1024 # MB
diff --git a/tests/qemu-iotests/040.out b/tests/qemu-iotests/040.out
index dae404e..b6f2576 100644
--- a/tests/qemu-iotests/040.out
+++ b/tests/qemu-iotests/040.out
@@ -1,5 +1,5 @@
-.........
+................
 ----------------------------------------------------------------------
-Ran 9 tests
+Ran 16 tests
 
 OK
commit d5208c45be38ab858db6ec5a5097aa1c1a8ebbc9
Author: Jeff Cody <jcody at redhat.com>
Date:   Tue Oct 16 15:49:10 2012 -0400

    block: in commit, determine base image from the top image
    
    This simplifies some code and error checking, and also fixes a bug.
    
    bdrv_find_backing_image() should only be passed absolute filenames,
    or filenames relative to the chain.  In the QMP message handler for
    block commit, when looking up the base do so from the determined top
    image, so we know it is reachable from top.
    
    Some of the error messages put out by block-commit have changed
    slightly, which causes 2 tests cases for block-commit to fail.
    This patch updates the test cases to look for the correct error
    output.
    
    Signed-off-by: Jeff Cody <jcody at redhat.com>
    Reviewed-by: Eric Blake <eblake at redhat.com>
    Signed-off-by: Kevin Wolf <kwolf at redhat.com>

diff --git a/block/commit.c b/block/commit.c
index 733c914..13d9e82 100644
--- a/block/commit.c
+++ b/block/commit.c
@@ -211,15 +211,6 @@ void commit_start(BlockDriverState *bs, BlockDriverState *base,
         return;
     }
 
-    /* top and base may be valid, but let's make sure that base is reachable
-     * from top */
-    if (bdrv_find_backing_image(top, base->filename) != base) {
-        error_setg(errp,
-                   "Base (%s) is not reachable from top (%s)",
-                   base->filename, top->filename);
-        return;
-    }
-
     overlay_bs = bdrv_find_overlay(bs, top);
 
     if (overlay_bs == NULL) {
diff --git a/blockdev.c b/blockdev.c
index 99828ad..46e4bbd 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -1157,16 +1157,6 @@ void qmp_block_commit(const char *device,
         error_set(errp, QERR_DEVICE_NOT_FOUND, device);
         return;
     }
-    if (base && has_base) {
-        base_bs = bdrv_find_backing_image(bs, base);
-    } else {
-        base_bs = bdrv_find_base(bs);
-    }
-
-    if (base_bs == NULL) {
-        error_set(errp, QERR_BASE_NOT_FOUND, base ? base : "NULL");
-        return;
-    }
 
     /* default top_bs is the active layer */
     top_bs = bs;
@@ -1182,6 +1172,17 @@ void qmp_block_commit(const char *device,
         return;
     }
 
+    if (has_base && base) {
+        base_bs = bdrv_find_backing_image(top_bs, base);
+    } else {
+        base_bs = bdrv_find_base(top_bs);
+    }
+
+    if (base_bs == NULL) {
+        error_set(errp, QERR_BASE_NOT_FOUND, base ? base : "NULL");
+        return;
+    }
+
     commit_start(bs, base_bs, top_bs, speed, on_error, block_job_cb, bs,
                 &local_err);
     if (local_err != NULL) {
diff --git a/tests/qemu-iotests/040 b/tests/qemu-iotests/040
index 258e7ea..0fa6441 100755
--- a/tests/qemu-iotests/040
+++ b/tests/qemu-iotests/040
@@ -111,7 +111,7 @@ class TestSingleDrive(ImageCommitTestCase):
         self.assert_no_active_commit()
         result = self.vm.qmp('block-commit', device='drive0', top='%s' % backing_img, base='%s' % backing_img)
         self.assert_qmp(result, 'error/class', 'GenericError')
-        self.assert_qmp(result, 'error/desc', 'Invalid files for merge: top and base are the same')
+        self.assert_qmp(result, 'error/desc', 'Base \'%s\' not found' % backing_img)
 
     def test_top_invalid(self):
         self.assert_no_active_commit()
@@ -135,7 +135,7 @@ class TestSingleDrive(ImageCommitTestCase):
         self.assert_no_active_commit()
         result = self.vm.qmp('block-commit', device='drive0', top='%s' % backing_img, base='%s' % mid_img)
         self.assert_qmp(result, 'error/class', 'GenericError')
-        self.assert_qmp(result, 'error/desc', 'Base (%(1)s) is not reachable from top (%(2)s)' % {"1" : mid_img, "2" : backing_img})
+        self.assert_qmp(result, 'error/desc', 'Base \'%s\' not found' % mid_img)
 
     def test_top_omitted(self):
         self.assert_no_active_commit()
commit b1b1d783eabdb6ac4e4578b2c04b0c24483dce77
Author: Jeff Cody <jcody at redhat.com>
Date:   Tue Oct 16 15:49:09 2012 -0400

    block: make bdrv_find_backing_image compare canonical filenames
    
    Currently, bdrv_find_backing_image compares bs->backing_file with
    what is passed in as a backing_file name.  Mismatches may occur,
    however, when bs->backing_file and backing_file are not both
    absolute or relative.
    
    Use path_combine() to make sure any relative backing filenames are
    relative to the current image filename being searched, and then use
    realpath() to make all comparisons based on absolute filenames.
    
    If either backing_file or bs->backing_file is determine to be a
    protocol, then no filename normalization is performed.
    
    This also changes bdrv_find_backing_image to no longer be recursive,
    but iterative.
    
    Signed-off-by: Jeff Cody <jcody at redhat.com>
    Reviewed-by: Eric Blake <eblake at redhat.com>
    Signed-off-by: Kevin Wolf <kwolf at redhat.com>

diff --git a/block.c b/block.c
index e95f613..5e7fc9e 100644
--- a/block.c
+++ b/block.c
@@ -3123,22 +3123,70 @@ int bdrv_snapshot_load_tmp(BlockDriverState *bs,
     return -ENOTSUP;
 }
 
+/* backing_file can either be relative, or absolute, or a protocol.  If it is
+ * relative, it must be relative to the chain.  So, passing in bs->filename
+ * from a BDS as backing_file should not be done, as that may be relative to
+ * the CWD rather than the chain. */
 BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs,
         const char *backing_file)
 {
-    if (!bs->drv) {
+    char *filename_full = NULL;
+    char *backing_file_full = NULL;
+    char *filename_tmp = NULL;
+    int is_protocol = 0;
+    BlockDriverState *curr_bs = NULL;
+    BlockDriverState *retval = NULL;
+
+    if (!bs || !bs->drv || !backing_file) {
         return NULL;
     }
 
-    if (bs->backing_hd) {
-        if (strcmp(bs->backing_file, backing_file) == 0) {
-            return bs->backing_hd;
+    filename_full     = g_malloc(PATH_MAX);
+    backing_file_full = g_malloc(PATH_MAX);
+    filename_tmp      = g_malloc(PATH_MAX);
+
+    is_protocol = path_has_protocol(backing_file);
+
+    for (curr_bs = bs; curr_bs->backing_hd; curr_bs = curr_bs->backing_hd) {
+
+        /* If either of the filename paths is actually a protocol, then
+         * compare unmodified paths; otherwise make paths relative */
+        if (is_protocol || path_has_protocol(curr_bs->backing_file)) {
+            if (strcmp(backing_file, curr_bs->backing_file) == 0) {
+                retval = curr_bs->backing_hd;
+                break;
+            }
         } else {
-            return bdrv_find_backing_image(bs->backing_hd, backing_file);
+            /* If not an absolute filename path, make it relative to the current
+             * image's filename path */
+            path_combine(filename_tmp, PATH_MAX, curr_bs->filename,
+                         backing_file);
+
+            /* We are going to compare absolute pathnames */
+            if (!realpath(filename_tmp, filename_full)) {
+                continue;
+            }
+
+            /* We need to make sure the backing filename we are comparing against
+             * is relative to the current image filename (or absolute) */
+            path_combine(filename_tmp, PATH_MAX, curr_bs->filename,
+                         curr_bs->backing_file);
+
+            if (!realpath(filename_tmp, backing_file_full)) {
+                continue;
+            }
+
+            if (strcmp(backing_file_full, filename_full) == 0) {
+                retval = curr_bs->backing_hd;
+                break;
+            }
         }
     }
 
-    return NULL;
+    g_free(filename_full);
+    g_free(backing_file_full);
+    g_free(filename_tmp);
+    return retval;
 }
 
 int bdrv_get_backing_file_depth(BlockDriverState *bs)
commit a616673dd1c2e00db5e3458d2ba4b6619b78876a
Author: Alex Bligh <alex at alex.org.uk>
Date:   Tue Oct 16 13:46:18 2012 +0100

    qemu-img rebase: use empty string to rebase without backing file
    
    This patch allows an empty filename to be passed as the new base image name
    for qemu-img rebase to mean base the image on no backing file (i.e.
    independent of any backing file). According to Eric Blake, qemu-img rebase
    already supports this when '-u' is used; this adds support when -u is not
    used.
    
    Signed-off-by: Alex Bligh <alex at alex.org.uk>
    Signed-off-by: Kevin Wolf <kwolf at redhat.com>

diff --git a/qemu-img.c b/qemu-img.c
index 849eb41..c092ccf 100644
--- a/qemu-img.c
+++ b/qemu-img.c
@@ -1562,13 +1562,15 @@ static int img_rebase(int argc, char **argv)
             error_report("Could not open old backing file '%s'", backing_name);
             goto out;
         }
-
-        bs_new_backing = bdrv_new("new_backing");
-        ret = bdrv_open(bs_new_backing, out_baseimg, BDRV_O_FLAGS,
+        if (out_baseimg[0]) {
+            bs_new_backing = bdrv_new("new_backing");
+            ret = bdrv_open(bs_new_backing, out_baseimg, BDRV_O_FLAGS,
                         new_backing_drv);
-        if (ret) {
-            error_report("Could not open new backing file '%s'", out_baseimg);
-            goto out;
+            if (ret) {
+                error_report("Could not open new backing file '%s'",
+                             out_baseimg);
+                goto out;
+            }
         }
     }
 
@@ -1584,7 +1586,7 @@ static int img_rebase(int argc, char **argv)
     if (!unsafe) {
         uint64_t num_sectors;
         uint64_t old_backing_num_sectors;
-        uint64_t new_backing_num_sectors;
+        uint64_t new_backing_num_sectors = 0;
         uint64_t sector;
         int n;
         uint8_t * buf_old;
@@ -1596,7 +1598,9 @@ static int img_rebase(int argc, char **argv)
 
         bdrv_get_geometry(bs, &num_sectors);
         bdrv_get_geometry(bs_old_backing, &old_backing_num_sectors);
-        bdrv_get_geometry(bs_new_backing, &new_backing_num_sectors);
+        if (bs_new_backing) {
+            bdrv_get_geometry(bs_new_backing, &new_backing_num_sectors);
+        }
 
         if (num_sectors != 0) {
             local_progress = (float)100 /
@@ -1636,7 +1640,7 @@ static int img_rebase(int argc, char **argv)
                 }
             }
 
-            if (sector >= new_backing_num_sectors) {
+            if (sector >= new_backing_num_sectors || !bs_new_backing) {
                 memset(buf_new, 0, n * BDRV_SECTOR_SIZE);
             } else {
                 if (sector + n > new_backing_num_sectors) {
@@ -1682,7 +1686,12 @@ static int img_rebase(int argc, char **argv)
      * backing file are overwritten in the COW file now, so the visible content
      * doesn't change when we switch the backing file.
      */
-    ret = bdrv_change_backing_file(bs, out_baseimg, out_basefmt);
+    if (out_baseimg && *out_baseimg) {
+        ret = bdrv_change_backing_file(bs, out_baseimg, out_basefmt);
+    } else {
+        ret = bdrv_change_backing_file(bs, NULL, NULL);
+    }
+
     if (ret == -ENOSPC) {
         error_report("Could not change the backing file to '%s': No "
                      "space left in the file header", out_baseimg);
diff --git a/qemu-img.texi b/qemu-img.texi
index 8b05f2c..42ec392 100644
--- a/qemu-img.texi
+++ b/qemu-img.texi
@@ -148,7 +148,9 @@ Changes the backing file of an image. Only the formats @code{qcow2} and
 
 The backing file is changed to @var{backing_file} and (if the image format of
 @var{filename} supports this) the backing file format is changed to
- at var{backing_fmt}.
+ at var{backing_fmt}. If @var{backing_file} is specified as ``'' (the empty
+string), then the image is rebased onto no backing file (i.e. it will exist
+independently of any backing file).
 
 There are two different modes in which @code{rebase} can operate:
 @table @option
commit b3d0380ec245d73e5233366f541497ef92b2e283
Author: Jeff Cody <jcody at redhat.com>
Date:   Mon Oct 15 16:58:02 2012 -0400

    qmp: fix __accept() in qmp.py
    
    In QEMUMonitorProtocol, commit e9d17b6 removed the __sockfile creation
    from __negotiate_capabilities(), which breaks _accept().  This causes
    failures in qemu-io python based tests (i.e. tests 030 and 040).
    
    This patch creates the sockfile in __accept() as well.
    
    Signed-off-by: Jeff Cody <jcody at redhat.com>
    Reviewed-by: Stefan Hajnoczi <stefanha at redhat.com>
    Acked-by: Luiz Capitulino <lcapitulino at redhat.com>
    Signed-off-by: Kevin Wolf <kwolf at redhat.com>

diff --git a/QMP/qmp.py b/QMP/qmp.py
index 33c7d36..32510a1 100644
--- a/QMP/qmp.py
+++ b/QMP/qmp.py
@@ -96,6 +96,7 @@ class QEMUMonitorProtocol:
         @raise QMPCapabilitiesError if fails to negotiate capabilities
         """
         self.__sock, _ = self.__sock.accept()
+        self.__sockfile = self.__sock.makefile()
         return self.__negotiate_capabilities()
 
     def cmd_obj(self, qmp_cmd):
commit ee17f9d5fd44fba950ab2290dd30f38d1cd2b625
Author: Kevin Wolf <kwolf at redhat.com>
Date:   Fri Oct 12 14:24:42 2012 +0200

    qemu-iotests: Test qemu-img operation on zero size image
    
    Signed-off-by: Kevin Wolf <kwolf at redhat.com>
    Reviewed-by: Paolo Bonzini <pbonzini at redhat.com>

diff --git a/tests/qemu-iotests/042 b/tests/qemu-iotests/042
new file mode 100755
index 0000000..c3c3ca8
--- /dev/null
+++ b/tests/qemu-iotests/042
@@ -0,0 +1,78 @@
+#!/bin/bash
+#
+# Test qemu-img operation on zero size images
+#
+# Copyright (C) 2012 Red Hat, Inc.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+
+# creator
+owner=kwolf at redhat.com
+
+seq=`basename $0`
+echo "QA output created by $seq"
+
+here=`pwd`
+tmp=/tmp/$$
+status=1	# failure is the default!
+
+_cleanup()
+{
+	_cleanup_test_img
+}
+trap "_cleanup; exit \$status" 0 1 2 3 15
+
+# get standard environment, filters and checks
+. ./common.rc
+. ./common.filter
+
+_supported_fmt qcow2 qcow qed vmdk
+_supported_proto file
+_supported_os Linux
+
+echo
+echo "== Creating zero size image =="
+
+_make_test_img 0
+_check_test_img
+
+mv $TEST_IMG $TEST_IMG.orig
+
+echo
+echo "== Converting the image =="
+
+$QEMU_IMG convert -O $IMGFMT $TEST_IMG.orig $TEST_IMG
+_check_test_img
+
+echo
+echo "== Converting the image, compressed =="
+
+if [ "$IMGFMT" == "qcow2" ]; then
+    $QEMU_IMG convert -c -O $IMGFMT $TEST_IMG.orig $TEST_IMG
+fi
+_check_test_img
+
+echo
+echo "== Rebasing the image =="
+
+$QEMU_IMG rebase -u -b $TEST_IMG.orig $TEST_IMG
+$QEMU_IMG rebase -b $TEST_IMG.orig $TEST_IMG
+_check_test_img
+
+# success, all done
+echo "*** done"
+rm -f $seq.full
+status=0
+
diff --git a/tests/qemu-iotests/042.out b/tests/qemu-iotests/042.out
new file mode 100644
index 0000000..dc80f4b
--- /dev/null
+++ b/tests/qemu-iotests/042.out
@@ -0,0 +1,15 @@
+QA output created by 042
+
+== Creating zero size image ==
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=0 
+No errors were found on the image.
+
+== Converting the image ==
+No errors were found on the image.
+
+== Converting the image, compressed ==
+No errors were found on the image.
+
+== Rebasing the image ==
+No errors were found on the image.
+*** done
diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group
index 66d2ba9..d5f9ab0 100644
--- a/tests/qemu-iotests/group
+++ b/tests/qemu-iotests/group
@@ -47,3 +47,4 @@
 038 rw auto backing
 039 rw auto
 040 rw auto
+042 rw auto quick
commit 1f71049523f4fc0738f96c74bfcce012521fa0f0
Author: Kevin Wolf <kwolf at redhat.com>
Date:   Fri Oct 12 14:29:18 2012 +0200

    qemu-img: Fix division by zero for zero size images
    
    Signed-off-by: Kevin Wolf <kwolf at redhat.com>
    Reviewed-by: Paolo Bonzini <pbonzini at redhat.com>

diff --git a/qemu-img.c b/qemu-img.c
index f17f187..849eb41 100644
--- a/qemu-img.c
+++ b/qemu-img.c
@@ -674,7 +674,7 @@ static int img_convert(int argc, char **argv)
     QEMUOptionParameter *out_baseimg_param;
     char *options = NULL;
     const char *snapshot_name = NULL;
-    float local_progress;
+    float local_progress = 0;
     int min_sparse = 8; /* Need at least 4k of zeros for sparse detection */
 
     fmt = NULL;
@@ -914,8 +914,10 @@ static int img_convert(int argc, char **argv)
         sector_num = 0;
 
         nb_sectors = total_sectors;
-        local_progress = (float)100 /
-            (nb_sectors / MIN(nb_sectors, cluster_sectors));
+        if (nb_sectors != 0) {
+            local_progress = (float)100 /
+                (nb_sectors / MIN(nb_sectors, cluster_sectors));
+        }
 
         for(;;) {
             int64_t bs_num;
@@ -986,8 +988,10 @@ static int img_convert(int argc, char **argv)
 
         sector_num = 0; // total number of sectors converted so far
         nb_sectors = total_sectors - sector_num;
-        local_progress = (float)100 /
-            (nb_sectors / MIN(nb_sectors, IO_BUF_SIZE / 512));
+        if (nb_sectors != 0) {
+            local_progress = (float)100 /
+                (nb_sectors / MIN(nb_sectors, IO_BUF_SIZE / 512));
+        }
 
         for(;;) {
             nb_sectors = total_sectors - sector_num;
@@ -1585,7 +1589,7 @@ static int img_rebase(int argc, char **argv)
         int n;
         uint8_t * buf_old;
         uint8_t * buf_new;
-        float local_progress;
+        float local_progress = 0;
 
         buf_old = qemu_blockalign(bs, IO_BUF_SIZE);
         buf_new = qemu_blockalign(bs, IO_BUF_SIZE);
@@ -1594,8 +1598,11 @@ static int img_rebase(int argc, char **argv)
         bdrv_get_geometry(bs_old_backing, &old_backing_num_sectors);
         bdrv_get_geometry(bs_new_backing, &new_backing_num_sectors);
 
-        local_progress = (float)100 /
-            (num_sectors / MIN(num_sectors, IO_BUF_SIZE / 512));
+        if (num_sectors != 0) {
+            local_progress = (float)100 /
+                (num_sectors / MIN(num_sectors, IO_BUF_SIZE / 512));
+        }
+
         for (sector = 0; sector < num_sectors; sector += n) {
 
             /* How many sectors can we handle with the next read? */
commit 6dd844db4a49a367cc15cd0e8bfb72cfc6652766
Author: Paolo Bonzini <pbonzini at redhat.com>
Date:   Wed Aug 22 16:43:07 2012 +0200

    qmp: add NBD server commands
    
    Adding an NBD server inside QEMU is trivial, since all the logic is
    in nbd.c and can be shared easily between qemu-nbd and QEMU itself.
    The main difference is that qemu-nbd serves a single unnamed export,
    while QEMU serves named exports.
    
    Acked-by: Luiz Capitulino <lcapitulino at redhat.com>
    Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>

diff --git a/Makefile.objs b/Makefile.objs
index ca67885..9eca179 100644
--- a/Makefile.objs
+++ b/Makefile.objs
@@ -61,7 +61,7 @@ endif
 # suppress *all* target specific code in case of system emulation, i.e. a
 # single QEMU executable should support all CPUs and machines.
 
-common-obj-y = $(block-obj-y) blockdev.o block/
+common-obj-y = $(block-obj-y) blockdev.o blockdev-nbd.o block/
 common-obj-y += net.o net/
 common-obj-y += qom/
 common-obj-y += readline.o console.o cursor.o
diff --git a/blockdev-nbd.c b/blockdev-nbd.c
new file mode 100644
index 0000000..8031813
--- /dev/null
+++ b/blockdev-nbd.c
@@ -0,0 +1,119 @@
+/*
+ * Serving QEMU block devices via NBD
+ *
+ * Copyright (c) 2012 Red Hat, Inc.
+ *
+ * Author: Paolo Bonzini <pbonzini at redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * later.  See the COPYING file in the top-level directory.
+ */
+
+#include "blockdev.h"
+#include "hw/block-common.h"
+#include "monitor.h"
+#include "qerror.h"
+#include "sysemu.h"
+#include "qmp-commands.h"
+#include "trace.h"
+#include "nbd.h"
+#include "qemu_socket.h"
+
+static int server_fd = -1;
+
+static void nbd_accept(void *opaque)
+{
+    struct sockaddr_in addr;
+    socklen_t addr_len = sizeof(addr);
+
+    int fd = accept(server_fd, (struct sockaddr *)&addr, &addr_len);
+    if (fd >= 0) {
+        nbd_client_new(NULL, fd, nbd_client_put);
+    }
+}
+
+void qmp_nbd_server_start(SocketAddress *addr, Error **errp)
+{
+    if (server_fd != -1) {
+        error_setg(errp, "NBD server already running");
+        return;
+    }
+
+    server_fd = socket_listen(addr, errp);
+    if (server_fd != -1) {
+        qemu_set_fd_handler2(server_fd, NULL, nbd_accept, NULL, NULL);
+    }
+}
+
+/* Hook into the BlockDriverState notifiers to close the export when
+ * the file is closed.
+ */
+typedef struct NBDCloseNotifier {
+    Notifier n;
+    NBDExport *exp;
+    QTAILQ_ENTRY(NBDCloseNotifier) next;
+} NBDCloseNotifier;
+
+static QTAILQ_HEAD(, NBDCloseNotifier) close_notifiers =
+    QTAILQ_HEAD_INITIALIZER(close_notifiers);
+
+static void nbd_close_notifier(Notifier *n, void *data)
+{
+    NBDCloseNotifier *cn = DO_UPCAST(NBDCloseNotifier, n, n);
+
+    notifier_remove(&cn->n);
+    QTAILQ_REMOVE(&close_notifiers, cn, next);
+
+    nbd_export_close(cn->exp);
+    nbd_export_put(cn->exp);
+    g_free(cn);
+}
+
+static void nbd_server_put_ref(NBDExport *exp)
+{
+    BlockDriverState *bs = nbd_export_get_blockdev(exp);
+    drive_put_ref(drive_get_by_blockdev(bs));
+}
+
+void qmp_nbd_server_add(const char *device, bool has_writable, bool writable,
+                        Error **errp)
+{
+    BlockDriverState *bs;
+    NBDExport *exp;
+    NBDCloseNotifier *n;
+
+    if (nbd_export_find(device)) {
+        error_setg(errp, "NBD server already exporting device '%s'", device);
+        return;
+    }
+
+    bs = bdrv_find(device);
+    if (!bs) {
+        error_set(errp, QERR_DEVICE_NOT_FOUND, device);
+        return;
+    }
+
+    exp = nbd_export_new(bs, 0, -1, writable ? 0 : NBD_FLAG_READ_ONLY,
+                         nbd_server_put_ref);
+
+    nbd_export_set_name(exp, device);
+    drive_get_ref(drive_get_by_blockdev(bs));
+
+    n = g_malloc0(sizeof(NBDCloseNotifier));
+    n->n.notify = nbd_close_notifier;
+    n->exp = exp;
+    bdrv_add_close_notifier(bs, &n->n);
+    QTAILQ_INSERT_TAIL(&close_notifiers, n, next);
+}
+
+void qmp_nbd_server_stop(Error **errp)
+{
+    while (!QTAILQ_EMPTY(&close_notifiers)) {
+        NBDCloseNotifier *cn = QTAILQ_FIRST(&close_notifiers);
+        nbd_close_notifier(&cn->n, nbd_export_get_blockdev(cn->exp));
+    }
+
+    qemu_set_fd_handler2(server_fd, NULL, NULL, NULL, NULL);
+    close(server_fd);
+    server_fd = -1;
+}
diff --git a/qapi-schema.json b/qapi-schema.json
index 8468567..6fd263e 100644
--- a/qapi-schema.json
+++ b/qapi-schema.json
@@ -2863,3 +2863,46 @@
 # Since: 0.14.0
 ##
 { 'command': 'screendump', 'data': {'filename': 'str'} }
+
+##
+# @nbd-server-start:
+#
+# Start an NBD server listening on the given host and port.  Block
+# devices can then be exported using @nbd-server-add.  The NBD
+# server will present them as named exports; for example, another
+# QEMU instance could refer to them as "nbd:HOST:PORT:exportname=NAME".
+#
+# @addr: Address on which to listen.
+#
+# Returns: error if the server is already running.
+#
+# Since: 1.3.0
+##
+{ 'command': 'nbd-server-start',
+  'data': { 'addr': 'SocketAddress' } }
+
+##
+# @nbd-server-add:
+#
+# Export a device to QEMU's embedded NBD server.
+#
+# @device: Block device to be exported
+#
+# @writable: Whether clients should be able to write to the device via the
+#     NBD connection (default false). #optional
+#
+# Returns: error if the device is already marked for export.
+#
+# Since: 1.3.0
+##
+{ 'command': 'nbd-server-add', 'data': {'device': 'str', '*writable': 'bool'} }
+
+##
+# @nbd-server-stop:
+#
+# Stop QEMU's embedded NBD server, and unregister all devices previously
+# added via @nbd-server-add.
+#
+# Since: 1.3.0
+##
+{ 'command': 'nbd-server-stop' }
diff --git a/qmp-commands.hx b/qmp-commands.hx
index 5ba8c48..ebe9a78 100644
--- a/qmp-commands.hx
+++ b/qmp-commands.hx
@@ -2552,6 +2552,22 @@ EQMP
     },
 
     {
+        .name       = "nbd-server-start",
+        .args_type  = "addr:q",
+        .mhandler.cmd_new = qmp_marshal_input_nbd_server_start,
+    },
+    {
+        .name       = "nbd-server-add",
+        .args_type  = "device:B,writable:b?",
+        .mhandler.cmd_new = qmp_marshal_input_nbd_server_add,
+    },
+    {
+        .name       = "nbd-server-stop",
+        .args_type  = "",
+        .mhandler.cmd_new = qmp_marshal_input_nbd_server_stop,
+    },
+
+    {
         .name       = "change-vnc-password",
         .args_type  = "password:s",
         .mhandler.cmd_new = qmp_marshal_input_change_vnc_password,
commit d7d512f60979681c27597f1b1277e03505c1de08
Author: Paolo Bonzini <pbonzini at redhat.com>
Date:   Thu Aug 23 11:20:36 2012 +0200

    block: add close notifiers
    
    The first user of close notifiers will be the embedded NBD server.
    It would be possible to use them to do some of the ad hoc processing
    (e.g. for block jobs and I/O limits) that is currently done by
    bdrv_close.
    
    Acked-by: Kevin Wolf <kwolf at redhat.com>
    Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>

diff --git a/Makefile.objs b/Makefile.objs
index 3f16d67..ca67885 100644
--- a/Makefile.objs
+++ b/Makefile.objs
@@ -43,7 +43,7 @@ coroutine-obj-$(CONFIG_WIN32) += coroutine-win32.o
 
 block-obj-y = cutils.o iov.o cache-utils.o qemu-option.o module.o async.o
 block-obj-y += nbd.o block.o blockjob.o aio.o aes.o qemu-config.o
-block-obj-y += qemu-progress.o qemu-sockets.o uri.o
+block-obj-y += qemu-progress.o qemu-sockets.o uri.o notify.o
 block-obj-y += $(coroutine-obj-y) $(qobject-obj-y) $(version-obj-y)
 block-obj-$(CONFIG_POSIX) += posix-aio-compat.o
 block-obj-$(CONFIG_LINUX_AIO) += linux-aio.o
@@ -94,7 +94,7 @@ common-obj-y += bt-host.o bt-vhci.o
 common-obj-y += dma-helpers.o
 common-obj-y += iov.o acl.o
 common-obj-$(CONFIG_POSIX) += compatfd.o
-common-obj-y += notify.o event_notifier.o
+common-obj-y += event_notifier.o
 common-obj-y += qemu-timer.o qemu-timer-common.o
 common-obj-y += qtest.o
 common-obj-y += vl.o
diff --git a/block.c b/block.c
index 2e4ddea..56426a9 100644
--- a/block.c
+++ b/block.c
@@ -30,6 +30,7 @@
 #include "module.h"
 #include "qjson.h"
 #include "sysemu.h"
+#include "notify.h"
 #include "qemu-coroutine.h"
 #include "qmp-commands.h"
 #include "qemu-timer.h"
@@ -312,9 +313,16 @@ BlockDriverState *bdrv_new(const char *device_name)
         QTAILQ_INSERT_TAIL(&bdrv_states, bs, list);
     }
     bdrv_iostatus_disable(bs);
+    notifier_list_init(&bs->close_notifiers);
+
     return bs;
 }
 
+void bdrv_add_close_notifier(BlockDriverState *bs, Notifier *notify)
+{
+    notifier_list_add(&bs->close_notifiers, notify);
+}
+
 BlockDriver *bdrv_find_format(const char *format_name)
 {
     BlockDriver *drv1;
@@ -1102,6 +1110,7 @@ void bdrv_close(BlockDriverState *bs)
         block_job_cancel_sync(bs->job);
     }
     bdrv_drain_all();
+    notifier_list_notify(&bs->close_notifiers, bs);
 
     if (bs->drv) {
         if (bs == bs_snapshots) {
diff --git a/block.h b/block.h
index e2d89d7..aa608a8 100644
--- a/block.h
+++ b/block.h
@@ -144,6 +144,7 @@ int bdrv_reopen_prepare(BDRVReopenState *reopen_state,
 void bdrv_reopen_commit(BDRVReopenState *reopen_state);
 void bdrv_reopen_abort(BDRVReopenState *reopen_state);
 void bdrv_close(BlockDriverState *bs);
+void bdrv_add_close_notifier(BlockDriverState *bs, Notifier *notify);
 int bdrv_attach_dev(BlockDriverState *bs, void *dev);
 void bdrv_attach_dev_nofail(BlockDriverState *bs, void *dev);
 void bdrv_detach_dev(BlockDriverState *bs, void *dev);
diff --git a/block_int.h b/block_int.h
index f4bae04..cedabbd 100644
--- a/block_int.h
+++ b/block_int.h
@@ -233,6 +233,8 @@ struct BlockDriverState {
     BlockDriverState *backing_hd;
     BlockDriverState *file;
 
+    NotifierList close_notifiers;
+
     /* number of in-flight copy-on-read requests */
     unsigned int copy_on_read_in_flight;
 
commit 3cbc002c34aa85ea952ee9b169a3ff97d350516a
Author: Paolo Bonzini <pbonzini at redhat.com>
Date:   Fri Oct 19 11:36:48 2012 +0200

    block: prepare code for adding block notifiers
    
    There is no reason in principle to skip job cancellation and draining
    of pending I/O when there is no medium in the disk.  Do these unconditionally,
    which also prepares the code for the next patch.
    
    Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>

diff --git a/block.c b/block.c
index e95f613..2e4ddea 100644
--- a/block.c
+++ b/block.c
@@ -1098,12 +1098,12 @@ void bdrv_reopen_abort(BDRVReopenState *reopen_state)
 void bdrv_close(BlockDriverState *bs)
 {
     bdrv_flush(bs);
-    if (bs->drv) {
-        if (bs->job) {
-            block_job_cancel_sync(bs->job);
-        }
-        bdrv_drain_all();
+    if (bs->job) {
+        block_job_cancel_sync(bs->job);
+    }
+    bdrv_drain_all();
 
+    if (bs->drv) {
         if (bs == bs_snapshots) {
             bs_snapshots = NULL;
         }
commit 101f9cbc2b73340197ec610c095fd69f0b1413ed
Author: Paolo Bonzini <pbonzini at redhat.com>
Date:   Tue Oct 23 21:31:53 2012 +0200

    qemu-sockets: add socket_listen, socket_connect, socket_parse
    
    These are QAPI-friendly versions of the qemu-sockets functions.  They
    support IP sockets, Unix sockets, and named file descriptors, using a
    QAPI union to dispatch to the correct function.
    
    Reviewed-by: Luiz Capitulino <lcapitulino at redhat.com>
    Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>

diff --git a/Makefile b/Makefile
index 9f599bc..17e2d58 100644
--- a/Makefile
+++ b/Makefile
@@ -161,7 +161,7 @@ qemu-img.o: qemu-img-cmds.h
 
 tools-obj-y = $(oslib-obj-y) $(trace-obj-y) qemu-tool.o qemu-timer.o \
 	qemu-timer-common.o main-loop.o notify.o \
-	iohandler.o cutils.o iov.o async.o
+	iohandler.o cutils.o iov.o async.o error.o
 tools-obj-$(CONFIG_POSIX) += compatfd.o
 
 qemu-img$(EXESUF): qemu-img.o $(tools-obj-y) $(block-obj-y)
diff --git a/qemu-sockets.c b/qemu-sockets.c
index 909c65f..cfed9c5 100644
--- a/qemu-sockets.c
+++ b/qemu-sockets.c
@@ -22,6 +22,7 @@
 #include <errno.h>
 #include <unistd.h>
 
+#include "monitor.h"
 #include "qemu_socket.h"
 #include "qemu-common.h" /* for qemu_isdigit */
 #include "main-loop.h"
@@ -845,6 +846,104 @@ int unix_nonblocking_connect(const char *path,
     return sock;
 }
 
+SocketAddress *socket_parse(const char *str, Error **errp)
+{
+    SocketAddress *addr = NULL;
+
+    addr = g_new(SocketAddress, 1);
+    if (strstart(str, "unix:", NULL)) {
+        if (str[5] == '\0') {
+            error_setg(errp, "invalid Unix socket address\n");
+            goto fail;
+        } else {
+            addr->kind = SOCKET_ADDRESS_KIND_UNIX;
+            addr->q_unix = g_new(UnixSocketAddress, 1);
+            addr->q_unix->path = g_strdup(str + 5);
+        }
+    } else if (strstart(str, "fd:", NULL)) {
+        if (str[3] == '\0') {
+            error_setg(errp, "invalid file descriptor address\n");
+            goto fail;
+        } else {
+            addr->kind = SOCKET_ADDRESS_KIND_FD;
+            addr->fd = g_new(String, 1);
+            addr->fd->str = g_strdup(str + 3);
+        }
+    } else {
+        addr->kind = SOCKET_ADDRESS_KIND_INET;
+        addr->inet = g_new(InetSocketAddress, 1);
+        addr->inet = inet_parse(str, errp);
+        if (addr->inet == NULL) {
+            goto fail;
+        }
+    }
+    return addr;
+
+fail:
+    qapi_free_SocketAddress(addr);
+    return NULL;
+}
+
+int socket_connect(SocketAddress *addr, Error **errp,
+                   NonBlockingConnectHandler *callback, void *opaque)
+{
+    QemuOpts *opts;
+    int fd;
+
+    opts = qemu_opts_create(&dummy_opts, NULL, 0, NULL);
+    switch (addr->kind) {
+    case SOCKET_ADDRESS_KIND_INET:
+        inet_addr_to_opts(opts, addr->inet);
+        fd = inet_connect_opts(opts, errp, callback, opaque);
+        break;
+
+    case SOCKET_ADDRESS_KIND_UNIX:
+        qemu_opt_set(opts, "path", addr->q_unix->path);
+        fd = unix_connect_opts(opts, errp, callback, opaque);
+        break;
+
+    case SOCKET_ADDRESS_KIND_FD:
+        fd = monitor_get_fd(cur_mon, addr->fd->str, errp);
+        if (callback) {
+            callback(fd, opaque);
+        }
+        break;
+
+    default:
+        abort();
+    }
+    qemu_opts_del(opts);
+    return fd;
+}
+
+int socket_listen(SocketAddress *addr, Error **errp)
+{
+    QemuOpts *opts;
+    int fd;
+
+    opts = qemu_opts_create(&dummy_opts, NULL, 0, NULL);
+    switch (addr->kind) {
+    case SOCKET_ADDRESS_KIND_INET:
+        inet_addr_to_opts(opts, addr->inet);
+        fd = inet_listen_opts(opts, 0, errp);
+        break;
+
+    case SOCKET_ADDRESS_KIND_UNIX:
+        qemu_opt_set(opts, "path", addr->q_unix->path);
+        fd = unix_listen_opts(opts, errp);
+        break;
+
+    case SOCKET_ADDRESS_KIND_FD:
+        fd = monitor_get_fd(cur_mon, addr->fd->str, errp);
+        break;
+
+    default:
+        abort();
+    }
+    qemu_opts_del(opts);
+    return fd;
+}
+
 #ifdef _WIN32
 static void socket_cleanup(void)
 {
diff --git a/qemu-tool.c b/qemu-tool.c
index f2f9813..da4c05a 100644
--- a/qemu-tool.c
+++ b/qemu-tool.c
@@ -38,6 +38,12 @@ const char *qemu_get_vm_name(void)
 
 Monitor *cur_mon;
 
+int monitor_get_fd(Monitor *mon, const char *name, Error **errp)
+{
+    error_setg(errp, "only QEMU supports file descriptor passing");
+    return -1;
+}
+
 void vm_stop(RunState state)
 {
     abort();
diff --git a/qemu_socket.h b/qemu_socket.h
index 89a5feb..02490ad 100644
--- a/qemu_socket.h
+++ b/qemu_socket.h
@@ -65,6 +65,11 @@ int unix_nonblocking_connect(const char *str,
                              NonBlockingConnectHandler *callback,
                              void *opaque, Error **errp);
 
+SocketAddress *socket_parse(const char *str, Error **errp);
+int socket_connect(SocketAddress *addr, Error **errp,
+                   NonBlockingConnectHandler *callback, void *opaque);
+int socket_listen(SocketAddress *addr, Error **errp);
+
 /* Old, ipv4 only bits.  Don't use for new code. */
 int parse_host_port(struct sockaddr_in *saddr, const char *str);
 int socket_init(void);
commit 0ef3dd6c2de1fa4f80c5aa71fb5fdada0f35cc9a
Author: Paolo Bonzini <pbonzini at redhat.com>
Date:   Tue Oct 23 22:36:08 2012 +0200

    tests: do not include tools-obj-y
    Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>

diff --git a/tests/Makefile b/tests/Makefile
index 26a67ce..86c9b79 100644
--- a/tests/Makefile
+++ b/tests/Makefile
@@ -42,11 +42,11 @@ test-qapi-obj-y += module.o
 
 $(test-obj-y): QEMU_INCLUDES += -Itests
 
-tests/check-qint$(EXESUF): tests/check-qint.o qint.o $(tools-obj-y)
-tests/check-qstring$(EXESUF): tests/check-qstring.o qstring.o $(tools-obj-y)
-tests/check-qdict$(EXESUF): tests/check-qdict.o qdict.o qfloat.o qint.o qstring.o qbool.o qlist.o $(tools-obj-y)
-tests/check-qlist$(EXESUF): tests/check-qlist.o qlist.o qint.o $(tools-obj-y)
-tests/check-qfloat$(EXESUF): tests/check-qfloat.o qfloat.o $(tools-obj-y)
+tests/check-qint$(EXESUF): tests/check-qint.o qint.o
+tests/check-qstring$(EXESUF): tests/check-qstring.o qstring.o
+tests/check-qdict$(EXESUF): tests/check-qdict.o qdict.o qfloat.o qint.o qstring.o qbool.o qlist.o
+tests/check-qlist$(EXESUF): tests/check-qlist.o qlist.o qint.o
+tests/check-qfloat$(EXESUF): tests/check-qfloat.o qfloat.o
 tests/check-qjson$(EXESUF): tests/check-qjson.o $(qobject-obj-y) $(tools-obj-y)
 tests/test-coroutine$(EXESUF): tests/test-coroutine.o $(coroutine-obj-y) $(tools-obj-y)
 tests/test-iov$(EXESUF): tests/test-iov.o iov.o
commit 879e45c72da1569e07fbbc6a1aa2a708ea796044
Author: Paolo Bonzini <pbonzini at redhat.com>
Date:   Wed Sep 19 13:51:46 2012 +0200

    qemu-sockets: return InetSocketAddress from inet_parse
    
    Reviewed-by: Luiz Capitulino <lcapitulino at redhat.com>
    Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>

diff --git a/qemu-sockets.c b/qemu-sockets.c
index daff8e6..909c65f 100644
--- a/qemu-sockets.c
+++ b/qemu-sockets.c
@@ -480,54 +480,91 @@ err:
 }
 
 /* compatibility wrapper */
-static void inet_parse(QemuOpts *opts, const char *str, Error **errp)
+static InetSocketAddress *inet_parse(const char *str, Error **errp)
 {
+    InetSocketAddress *addr;
     const char *optstr, *h;
-    char addr[64];
+    char host[64];
     char port[33];
+    int to;
     int pos;
 
+    addr = g_new0(InetSocketAddress, 1);
+
     /* parse address */
     if (str[0] == ':') {
         /* no host given */
-        addr[0] = '\0';
-        if (1 != sscanf(str,":%32[^,]%n",port,&pos)) {
+        host[0] = '\0';
+        if (1 != sscanf(str, ":%32[^,]%n", port, &pos)) {
             error_setg(errp, "error parsing port in address '%s'", str);
-            return;
+            goto fail;
         }
     } else if (str[0] == '[') {
         /* IPv6 addr */
-        if (2 != sscanf(str,"[%64[^]]]:%32[^,]%n",addr,port,&pos)) {
+        if (2 != sscanf(str, "[%64[^]]]:%32[^,]%n", host, port, &pos)) {
             error_setg(errp, "error parsing IPv6 address '%s'", str);
-            return;
+            goto fail;
         }
-        qemu_opt_set(opts, "ipv6", "on");
+        addr->ipv6 = addr->has_ipv6 = true;
     } else if (qemu_isdigit(str[0])) {
         /* IPv4 addr */
-        if (2 != sscanf(str,"%64[0-9.]:%32[^,]%n",addr,port,&pos)) {
+        if (2 != sscanf(str, "%64[0-9.]:%32[^,]%n", host, port, &pos)) {
             error_setg(errp, "error parsing IPv4 address '%s'", str);
-            return;
+            goto fail;
         }
-        qemu_opt_set(opts, "ipv4", "on");
+        addr->ipv4 = addr->has_ipv4 = true;
     } else {
         /* hostname */
-        if (2 != sscanf(str,"%64[^:]:%32[^,]%n",addr,port,&pos)) {
+        if (2 != sscanf(str, "%64[^:]:%32[^,]%n", host, port, &pos)) {
             error_setg(errp, "error parsing address '%s'", str);
-            return;
+            goto fail;
         }
     }
-    qemu_opt_set(opts, "host", addr);
-    qemu_opt_set(opts, "port", port);
+
+    addr->host = g_strdup(host);
+    addr->port = g_strdup(port);
 
     /* parse options */
     optstr = str + pos;
     h = strstr(optstr, ",to=");
-    if (h)
-        qemu_opt_set(opts, "to", h+4);
-    if (strstr(optstr, ",ipv4"))
-        qemu_opt_set(opts, "ipv4", "on");
-    if (strstr(optstr, ",ipv6"))
-        qemu_opt_set(opts, "ipv6", "on");
+    if (h) {
+        if (1 != sscanf(str, "%d%n", &to, &pos) ||
+            (str[pos] != '\0' && str[pos] != ',')) {
+            error_setg(errp, "error parsing to= argument");
+            goto fail;
+        }
+        addr->has_to = true;
+        addr->to = to;
+    }
+    if (strstr(optstr, ",ipv4")) {
+        addr->ipv4 = addr->has_ipv4 = true;
+    }
+    if (strstr(optstr, ",ipv6")) {
+        addr->ipv6 = addr->has_ipv6 = true;
+    }
+    return addr;
+
+fail:
+    qapi_free_InetSocketAddress(addr);
+    return NULL;
+}
+
+static void inet_addr_to_opts(QemuOpts *opts, InetSocketAddress *addr)
+{
+    bool ipv4 = addr->ipv4 || !addr->has_ipv4;
+    bool ipv6 = addr->ipv6 || !addr->has_ipv6;
+
+    if (!ipv4 || !ipv6) {
+        qemu_opt_set_bool(opts, "ipv4", ipv4);
+        qemu_opt_set_bool(opts, "ipv6", ipv6);
+    }
+    if (addr->has_to) {
+        char to[20];
+        snprintf(to, sizeof(to), "%d", addr->to);
+        qemu_opt_set(opts, "to", to);
+    }
+    qemu_opt_set(opts, "host", addr->host);
+    qemu_opt_set(opts, "port", addr->port);
 }
 
 int inet_listen(const char *str, char *ostr, int olen,
@@ -536,11 +573,13 @@ int inet_listen(const char *str, char *ostr, int olen,
     QemuOpts *opts;
     char *optstr;
     int sock = -1;
-    Error *local_err = NULL;
+    InetSocketAddress *addr;
 
-    opts = qemu_opts_create(&dummy_opts, NULL, 0, NULL);
-    inet_parse(opts, str, &local_err);
-    if (local_err == NULL) {
+    addr = inet_parse(str, errp);
+    if (addr != NULL) {
+        opts = qemu_opts_create(&dummy_opts, NULL, 0, NULL);
+        inet_addr_to_opts(opts, addr);
+        qapi_free_InetSocketAddress(addr);
         sock = inet_listen_opts(opts, port_offset, errp);
         if (sock != -1 && ostr) {
             optstr = strchr(str, ',');
@@ -556,10 +595,8 @@ int inet_listen(const char *str, char *ostr, int olen,
                          optstr ? optstr : "");
             }
         }
-    } else {
-        error_propagate(errp, local_err);
+        qemu_opts_del(opts);
     }
-    qemu_opts_del(opts);
     return sock;
 }
 
@@ -575,16 +612,16 @@ int inet_connect(const char *str, Error **errp)
 {
     QemuOpts *opts;
     int sock = -1;
-    Error *local_err = NULL;
+    InetSocketAddress *addr;
 
-    opts = qemu_opts_create(&dummy_opts, NULL, 0, NULL);
-    inet_parse(opts, str, &local_err);
-    if (local_err == NULL) {
+    addr = inet_parse(str, errp);
+    if (addr != NULL) {
+        opts = qemu_opts_create(&dummy_opts, NULL, 0, NULL);
+        inet_addr_to_opts(opts, addr);
+        qapi_free_InetSocketAddress(addr);
         sock = inet_connect_opts(opts, errp, NULL, NULL);
-    } else {
-        error_propagate(errp, local_err);
+        qemu_opts_del(opts);
     }
-    qemu_opts_del(opts);
     return sock;
 }
 
@@ -607,18 +644,18 @@ int inet_nonblocking_connect(const char *str,
 {
     QemuOpts *opts;
     int sock = -1;
-    Error *local_err = NULL;
+    InetSocketAddress *addr;
 
     g_assert(callback != NULL);
 
-    opts = qemu_opts_create(&dummy_opts, NULL, 0, NULL);
-    inet_parse(opts, str, &local_err);
-    if (local_err == NULL) {
+    addr = inet_parse(str, errp);
+    if (addr != NULL) {
+        opts = qemu_opts_create(&dummy_opts, NULL, 0, NULL);
+        inet_addr_to_opts(opts, addr);
+        qapi_free_InetSocketAddress(addr);
         sock = inet_connect_opts(opts, errp, callback, opaque);
-    } else {
-        error_propagate(errp, local_err);
+        qemu_opts_del(opts);
     }
-    qemu_opts_del(opts);
     return sock;
 }
 
commit 5be8c759f008947514fbf7d8df9a30eebe496b6d
Author: Paolo Bonzini <pbonzini at redhat.com>
Date:   Wed Sep 19 14:03:35 2012 +0200

    qapi: add socket address types
    
    Acked-by: Luiz Capitulino <lcapitulino at redhat.com>
    Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>

diff --git a/qapi-schema.json b/qapi-schema.json
index c615ee2..8468567 100644
--- a/qapi-schema.json
+++ b/qapi-schema.json
@@ -2519,6 +2519,59 @@
     'opts': 'NetClientOptions' } }
 
 ##
+# @InetSocketAddress
+#
+# Captures a socket address or address range in the Internet namespace.
+#
+# @host: host part of the address
+#
+# @port: port part of the address, or lowest port if @to is present
+#
+# @to: highest port to try
+#
+# @ipv4: whether to accept IPv4 addresses, default try both IPv4 and IPv6
+#        #optional
+#
+# @ipv6: whether to accept IPv6 addresses, default try both IPv4 and IPv6
+#        #optional
+#
+# Since 1.3
+##
+{ 'type': 'InetSocketAddress',
+  'data': {
+    'host': 'str',
+    'port': 'str',
+    '*to': 'uint16',
+    '*ipv4': 'bool',
+    '*ipv6': 'bool' } }
+
+##
+# @UnixSocketAddress
+#
+# Captures a socket address in the local ("Unix socket") namespace.
+#
+# @path: filesystem path to use
+#
+# Since 1.3
+##
+{ 'type': 'UnixSocketAddress',
+  'data': {
+    'path': 'str' } }
+
+##
+# @SocketAddress
+#
+# Captures the address of a socket, which could also be a named file descriptor
+#
+# Since 1.3
+##
+{ 'union': 'SocketAddress',
+  'data': {
+    'inet': 'InetSocketAddress',
+    'unix': 'UnixSocketAddress',
+    'fd': 'String' } }
+
+##
 # @getfd:
 #
 # Receive a file descriptor via SCM rights and assign it a name
commit 69758c22e0814f77c7a286ce65a68270ce71284e
Author: Paolo Bonzini <pbonzini at redhat.com>
Date:   Wed Sep 19 15:11:05 2012 +0200

    build: add QAPI files to the tools
    
    We need them because qemu-sockets will soon be using SocketAddress.
    
    Acked-by: Luiz Capitulino <lcapitulino at redhat.com>
    Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>

diff --git a/Makefile b/Makefile
index 88285a4..9f599bc 100644
--- a/Makefile
+++ b/Makefile
@@ -164,8 +164,7 @@ tools-obj-y = $(oslib-obj-y) $(trace-obj-y) qemu-tool.o qemu-timer.o \
 	iohandler.o cutils.o iov.o async.o
 tools-obj-$(CONFIG_POSIX) += compatfd.o
 
-qemu-img$(EXESUF): qemu-img.o $(tools-obj-y) $(block-obj-y) $(qapi-obj-y) \
-                              qapi-visit.o qapi-types.o
+qemu-img$(EXESUF): qemu-img.o $(tools-obj-y) $(block-obj-y)
 qemu-nbd$(EXESUF): qemu-nbd.o $(tools-obj-y) $(block-obj-y)
 qemu-io$(EXESUF): qemu-io.o cmd.o $(tools-obj-y) $(block-obj-y)
 
diff --git a/Makefile.objs b/Makefile.objs
index 74b3542..3f16d67 100644
--- a/Makefile.objs
+++ b/Makefile.objs
@@ -48,6 +48,7 @@ block-obj-y += $(coroutine-obj-y) $(qobject-obj-y) $(version-obj-y)
 block-obj-$(CONFIG_POSIX) += posix-aio-compat.o
 block-obj-$(CONFIG_LINUX_AIO) += linux-aio.o
 block-obj-y += block/
+block-obj-y += $(qapi-obj-y) qapi-types.o qapi-visit.o
 
 ifeq ($(CONFIG_VIRTIO)$(CONFIG_VIRTFS)$(CONFIG_PCI),yyy)
 # Lots of the fsdev/9pcode is pulled in by vl.c via qemu_fsdev_add.
@@ -239,9 +240,9 @@ QEMU_CFLAGS+=$(GLIB_CFLAGS)
 
 nested-vars += \
 	qga-obj-y \
-	block-obj-y \
 	qom-obj-y \
 	qapi-obj-y \
+	block-obj-y \
 	user-obj-y \
 	common-obj-y \
 	extra-obj-y
commit 3f8b11bc7d86a1952311565f089063d544d5a2f7
Author: Paolo Bonzini <pbonzini at redhat.com>
Date:   Fri Oct 19 11:34:18 2012 +0200

    vnc: drop QERR_VNC_SERVER_FAILED
    
    We now always return "nice" error messages in errp when we goto fail.
    Drop the default error message.
    
    Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>

diff --git a/qerror.h b/qerror.h
index c91708c..f23b978 100644
--- a/qerror.h
+++ b/qerror.h
@@ -237,9 +237,6 @@ void assert_no_error(Error *err);
 #define QERR_VIRTFS_FEATURE_BLOCKS_MIGRATION \
     ERROR_CLASS_GENERIC_ERROR, "Migration is disabled when VirtFS export path '%s' is mounted in the guest using mount_tag '%s'"
 
-#define QERR_VNC_SERVER_FAILED \
-    ERROR_CLASS_GENERIC_ERROR, "Could not start VNC server on %s"
-
 #define QERR_SOCKET_CONNECT_FAILED \
     ERROR_CLASS_GENERIC_ERROR, "Failed to connect to socket"
 
diff --git a/ui/vnc.c b/ui/vnc.c
index 9377569..d0ffcc5 100644
--- a/ui/vnc.c
+++ b/ui/vnc.c
@@ -3088,9 +3088,6 @@ void vnc_display_open(DisplayState *ds, const char *display, Error **errp)
     return;
 
 fail:
-    if (!error_is_set(errp)) {
-        error_set(errp, QERR_VNC_SERVER_FAILED, display);
-    }
     g_free(vs->display);
     vs->display = NULL;
 }
commit 58899664de29c250229f8d306fcf04f852cf5cc6
Author: Paolo Bonzini <pbonzini at redhat.com>
Date:   Wed Sep 19 13:54:39 2012 +0200

    qemu-sockets: add error propagation to Unix socket functions
    
    Before:
    
        $ qemu-system-x86_64 -monitor unix:/vvv,server=off
        connect(unix:/vvv): No such file or directory
        chardev: opening backend "socket" failed
    
    After:
    
        $ x86_64-softmmu/qemu-system-x86_64 -monitor unix:/vvv,server=off
        qemu-system-x86_64: -monitor unix:/vvv,server=off: Failed to connect to socket: No such file or directory
        chardev: opening backend "socket" failed
    
    Reviewed-by: Paolo Bonzini <pbonzini at redhat.com>
    Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>

diff --git a/qemu-sockets.c b/qemu-sockets.c
index fb9c4c9..daff8e6 100644
--- a/qemu-sockets.c
+++ b/qemu-sockets.c
@@ -632,7 +632,7 @@ int unix_listen_opts(QemuOpts *opts, Error **errp)
 
     sock = qemu_socket(PF_UNIX, SOCK_STREAM, 0);
     if (sock < 0) {
-        perror("socket(unix)");
+        error_set_errno(errp, errno, QERR_SOCKET_CREATE_FAILED);
         return -1;
     }
 
@@ -657,11 +657,11 @@ int unix_listen_opts(QemuOpts *opts, Error **errp)
 
     unlink(un.sun_path);
     if (bind(sock, (struct sockaddr*) &un, sizeof(un)) < 0) {
-        fprintf(stderr, "bind(unix:%s): %s\n", un.sun_path, strerror(errno));
+        error_set_errno(errp, errno, QERR_SOCKET_BIND_FAILED);
         goto err;
     }
     if (listen(sock, 1) < 0) {
-        fprintf(stderr, "listen(unix:%s): %s\n", un.sun_path, strerror(errno));
+        error_set_errno(errp, errno, QERR_SOCKET_LISTEN_FAILED);
         goto err;
     }
 
@@ -681,13 +681,13 @@ int unix_connect_opts(QemuOpts *opts, Error **errp,
     int sock, rc;
 
     if (NULL == path) {
-        fprintf(stderr, "unix connect: no path specified\n");
+        error_setg(errp, "unix connect: no path specified\n");
         return -1;
     }
 
     sock = qemu_socket(PF_UNIX, SOCK_STREAM, 0);
     if (sock < 0) {
-        perror("socket(unix)");
+        error_set_errno(errp, errno, QERR_SOCKET_CREATE_FAILED);
         return -1;
     }
     if (callback != NULL) {
@@ -722,7 +722,7 @@ int unix_connect_opts(QemuOpts *opts, Error **errp,
     }
 
     if (rc < 0) {
-        fprintf(stderr, "connect(unix:%s): %s\n", path, strerror(errno));
+        error_set_errno(errp, -rc, QERR_SOCKET_CONNECT_FAILED);
         close(sock);
         sock = -1;
     }
@@ -735,7 +735,7 @@ int unix_connect_opts(QemuOpts *opts, Error **errp,
 
 int unix_listen_opts(QemuOpts *opts, Error **errp)
 {
-    fprintf(stderr, "unix sockets are not available on windows\n");
+    error_setg(errp, "unix sockets are not available on windows");
     errno = ENOTSUP;
     return -1;
 }
@@ -743,7 +743,7 @@ int unix_listen_opts(QemuOpts *opts, Error **errp)
 int unix_connect_opts(QemuOpts *opts, Error **errp,
                       NonBlockingConnectHandler *callback, void *opaque)
 {
-    fprintf(stderr, "unix sockets are not available on windows\n");
+    error_setg(errp, "unix sockets are not available on windows");
     errno = ENOTSUP;
     return -1;
 }
commit 2f002c43ebded28604c26787c9215c4d3594f0ec
Author: Paolo Bonzini <pbonzini at redhat.com>
Date:   Wed Sep 19 13:22:21 2012 +0200

    qemu-sockets: add error propagation to inet_parse
    
    Reviewed-by: Luiz Capitulino <lcapitulino at redhat.com>
    Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>

diff --git a/qemu-sockets.c b/qemu-sockets.c
index 078f7c1..fb9c4c9 100644
--- a/qemu-sockets.c
+++ b/qemu-sockets.c
@@ -480,7 +480,7 @@ err:
 }
 
 /* compatibility wrapper */
-static int inet_parse(QemuOpts *opts, const char *str)
+static void inet_parse(QemuOpts *opts, const char *str, Error **errp)
 {
     const char *optstr, *h;
     char addr[64];
@@ -492,32 +492,28 @@ static int inet_parse(QemuOpts *opts, const char *str)
         /* no host given */
         addr[0] = '\0';
         if (1 != sscanf(str,":%32[^,]%n",port,&pos)) {
-            fprintf(stderr, "%s: portonly parse error (%s)\n",
-                    __FUNCTION__, str);
-            return -1;
+            error_setg(errp, "error parsing port in address '%s'", str);
+            return;
         }
     } else if (str[0] == '[') {
         /* IPv6 addr */
         if (2 != sscanf(str,"[%64[^]]]:%32[^,]%n",addr,port,&pos)) {
-            fprintf(stderr, "%s: ipv6 parse error (%s)\n",
-                    __FUNCTION__, str);
-            return -1;
+            error_setg(errp, "error parsing IPv6 address '%s'", str);
+            return;
         }
         qemu_opt_set(opts, "ipv6", "on");
     } else if (qemu_isdigit(str[0])) {
         /* IPv4 addr */
         if (2 != sscanf(str,"%64[0-9.]:%32[^,]%n",addr,port,&pos)) {
-            fprintf(stderr, "%s: ipv4 parse error (%s)\n",
-                    __FUNCTION__, str);
-            return -1;
+            error_setg(errp, "error parsing IPv4 address '%s'", str);
+            return;
         }
         qemu_opt_set(opts, "ipv4", "on");
     } else {
         /* hostname */
         if (2 != sscanf(str,"%64[^:]:%32[^,]%n",addr,port,&pos)) {
-            fprintf(stderr, "%s: hostname parse error (%s)\n",
-                    __FUNCTION__, str);
-            return -1;
+            error_setg(errp, "error parsing address '%s'", str);
+            return;
         }
     }
     qemu_opt_set(opts, "host", addr);
@@ -532,7 +528,6 @@ static int inet_parse(QemuOpts *opts, const char *str)
         qemu_opt_set(opts, "ipv4", "on");
     if (strstr(optstr, ",ipv6"))
         qemu_opt_set(opts, "ipv6", "on");
-    return 0;
 }
 
 int inet_listen(const char *str, char *ostr, int olen,
@@ -541,9 +536,11 @@ int inet_listen(const char *str, char *ostr, int olen,
     QemuOpts *opts;
     char *optstr;
     int sock = -1;
+    Error *local_err = NULL;
 
     opts = qemu_opts_create(&dummy_opts, NULL, 0, NULL);
-    if (inet_parse(opts, str) == 0) {
+    inet_parse(opts, str, &local_err);
+    if (local_err == NULL) {
         sock = inet_listen_opts(opts, port_offset, errp);
         if (sock != -1 && ostr) {
             optstr = strchr(str, ',');
@@ -560,7 +557,7 @@ int inet_listen(const char *str, char *ostr, int olen,
             }
         }
     } else {
-        error_set(errp, QERR_SOCKET_CREATE_FAILED);
+        error_propagate(errp, local_err);
     }
     qemu_opts_del(opts);
     return sock;
@@ -578,12 +575,14 @@ int inet_connect(const char *str, Error **errp)
 {
     QemuOpts *opts;
     int sock = -1;
+    Error *local_err = NULL;
 
     opts = qemu_opts_create(&dummy_opts, NULL, 0, NULL);
-    if (inet_parse(opts, str) == 0) {
+    inet_parse(opts, str, &local_err);
+    if (local_err == NULL) {
         sock = inet_connect_opts(opts, errp, NULL, NULL);
     } else {
-        error_set(errp, QERR_SOCKET_CREATE_FAILED);
+        error_propagate(errp, local_err);
     }
     qemu_opts_del(opts);
     return sock;
@@ -608,14 +607,16 @@ int inet_nonblocking_connect(const char *str,
 {
     QemuOpts *opts;
     int sock = -1;
+    Error *local_err = NULL;
 
     g_assert(callback != NULL);
 
     opts = qemu_opts_create(&dummy_opts, NULL, 0, NULL);
-    if (inet_parse(opts, str) == 0) {
+    inet_parse(opts, str, &local_err);
+    if (local_err == NULL) {
         sock = inet_connect_opts(opts, errp, callback, opaque);
     } else {
-        error_set(errp, QERR_SOCKET_CREATE_FAILED);
+        error_propagate(errp, local_err);
     }
     qemu_opts_del(opts);
     return sock;
commit 4f085c822947068c785be8c1f27d1ad4e3215149
Author: Paolo Bonzini <pbonzini at redhat.com>
Date:   Tue Oct 2 09:25:14 2012 +0200

    qemu-sockets: add error propagation to inet_dgram_opts
    
    Before:
    
        $ qemu-system-x86_64 -monitor udp:localhost:631 at localhost:631
        inet_dgram_opts: bind(ipv4,127.0.0.1,631): OK
        inet_dgram_opts failed
        chardev: opening backend "udp" failed
    
    After:
    
        $ x86_64-softmmu/qemu-system-x86_64 -monitor udp:localhost:631 at localhost:631
        qemu-system-x86_64: -monitor udp:localhost:631 at localhost:631: Failed to bind socket: Address already in use
        chardev: opening backend "udp" failed
    
    Reviewed-by: Paolo Bonzini <pbonzini at redhat.com>
    Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>

diff --git a/qemu-char.c b/qemu-char.c
index 04b5c23..afe2bfb 100644
--- a/qemu-char.c
+++ b/qemu-char.c
@@ -2105,7 +2105,6 @@ static CharDriverState *qemu_chr_open_udp(QemuOpts *opts)
 
     fd = inet_dgram_opts(opts, &local_err);
     if (fd < 0) {
-        fprintf(stderr, "inet_dgram_opts failed\n");
         goto return_err;
     }
 
diff --git a/qemu-sockets.c b/qemu-sockets.c
index 88d58fe..078f7c1 100644
--- a/qemu-sockets.c
+++ b/qemu-sockets.c
@@ -396,8 +396,6 @@ int inet_dgram_opts(QemuOpts *opts, Error **errp)
     struct addrinfo ai, *peer = NULL, *local = NULL;
     const char *addr;
     const char *port;
-    char uaddr[INET6_ADDRSTRLEN+1];
-    char uport[33];
     int sock = -1, rc;
 
     /* lookup peer addr */
@@ -412,7 +410,7 @@ int inet_dgram_opts(QemuOpts *opts, Error **errp)
         addr = "localhost";
     }
     if (port == NULL || strlen(port) == 0) {
-        fprintf(stderr, "inet_dgram: port not specified\n");
+        error_setg(errp, "remote port not specified");
         return -1;
     }
 
@@ -422,8 +420,8 @@ int inet_dgram_opts(QemuOpts *opts, Error **errp)
         ai.ai_family = PF_INET6;
 
     if (0 != (rc = getaddrinfo(addr, port, &ai, &peer))) {
-        fprintf(stderr,"getaddrinfo(%s,%s): %s\n", addr, port,
-                gai_strerror(rc));
+        error_setg(errp, "address resolution failed for %s:%s: %s", addr, port,
+                   gai_strerror(rc));
 	return -1;
     }
 
@@ -442,44 +440,28 @@ int inet_dgram_opts(QemuOpts *opts, Error **errp)
         port = "0";
 
     if (0 != (rc = getaddrinfo(addr, port, &ai, &local))) {
-        fprintf(stderr,"getaddrinfo(%s,%s): %s\n", addr, port,
-                gai_strerror(rc));
+        error_setg(errp, "address resolution failed for %s:%s: %s", addr, port,
+                   gai_strerror(rc));
         goto err;
     }
 
     /* create socket */
     sock = qemu_socket(peer->ai_family, peer->ai_socktype, peer->ai_protocol);
     if (sock < 0) {
-        fprintf(stderr,"%s: socket(%s): %s\n", __FUNCTION__,
-                inet_strfamily(peer->ai_family), strerror(errno));
+        error_set_errno(errp, errno, QERR_SOCKET_CREATE_FAILED);
         goto err;
     }
     setsockopt(sock,SOL_SOCKET,SO_REUSEADDR,(void*)&on,sizeof(on));
 
     /* bind socket */
-    if (getnameinfo((struct sockaddr*)local->ai_addr,local->ai_addrlen,
-                    uaddr,INET6_ADDRSTRLEN,uport,32,
-                    NI_NUMERICHOST | NI_NUMERICSERV) != 0) {
-        fprintf(stderr, "%s: getnameinfo: oops\n", __FUNCTION__);
-        goto err;
-    }
     if (bind(sock, local->ai_addr, local->ai_addrlen) < 0) {
-        fprintf(stderr,"%s: bind(%s,%s,%d): OK\n", __FUNCTION__,
-                inet_strfamily(local->ai_family), uaddr, inet_getport(local));
+        error_set_errno(errp, errno, QERR_SOCKET_BIND_FAILED);
         goto err;
     }
 
     /* connect to peer */
-    if (getnameinfo((struct sockaddr*)peer->ai_addr, peer->ai_addrlen,
-                    uaddr, INET6_ADDRSTRLEN, uport, 32,
-                    NI_NUMERICHOST | NI_NUMERICSERV) != 0) {
-        fprintf(stderr, "%s: getnameinfo: oops\n", __FUNCTION__);
-        goto err;
-    }
     if (connect(sock,peer->ai_addr,peer->ai_addrlen) < 0) {
-        fprintf(stderr, "%s: connect(%s,%s,%s,%s): %s\n", __FUNCTION__,
-                inet_strfamily(peer->ai_family),
-                peer->ai_canonname, uaddr, uport, strerror(errno));
+        error_set_errno(errp, errno, QERR_SOCKET_CONNECT_FAILED);
         goto err;
     }
 
commit 11663b553b0815b266b6a9060ab9702cf7a5334c
Author: Paolo Bonzini <pbonzini at redhat.com>
Date:   Tue Oct 2 09:19:01 2012 +0200

    qemu-sockets: add error propagation to inet_connect_addr
    
    perror and fprintf can be removed because all clients can now consume
    Errors properly.  However, we'll need to change the non-blocking connect
    handlers to take an Error, in order to improve error handling for
    migration with the TCP protocol.
    
    This is a minor degradation in error reporting for outgoing migration.
    However, until 1.2 this case just failed without even attempting to
    connect, so it is still an improvement as far as overall QoI is
    concerned.
    
    Reviewed-by: Luiz Capitulino <lcapitulino at redhat.com>
    Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>

diff --git a/qemu-sockets.c b/qemu-sockets.c
index 9e3d233..88d58fe 100644
--- a/qemu-sockets.c
+++ b/qemu-sockets.c
@@ -216,7 +216,7 @@ typedef struct ConnectState {
 } ConnectState;
 
 static int inet_connect_addr(struct addrinfo *addr, bool *in_progress,
-                             ConnectState *connect_state);
+                             ConnectState *connect_state, Error **errp);
 
 static void wait_for_connect(void *opaque)
 {
@@ -246,7 +246,7 @@ static void wait_for_connect(void *opaque)
     if (s->current_addr) {
         while (s->current_addr->ai_next != NULL && s->fd < 0) {
             s->current_addr = s->current_addr->ai_next;
-            s->fd = inet_connect_addr(s->current_addr, &in_progress, s);
+            s->fd = inet_connect_addr(s->current_addr, &in_progress, s, NULL);
             /* connect in progress */
             if (in_progress) {
                 return;
@@ -263,7 +263,7 @@ static void wait_for_connect(void *opaque)
 }
 
 static int inet_connect_addr(struct addrinfo *addr, bool *in_progress,
-                             ConnectState *connect_state)
+                             ConnectState *connect_state, Error **errp)
 {
     int sock, rc;
 
@@ -271,8 +271,7 @@ static int inet_connect_addr(struct addrinfo *addr, bool *in_progress,
 
     sock = qemu_socket(addr->ai_family, addr->ai_socktype, addr->ai_protocol);
     if (sock < 0) {
-        fprintf(stderr, "%s: socket(%s): %s\n", __func__,
-                inet_strfamily(addr->ai_family), strerror(errno));
+        error_set_errno(errp, errno, QERR_SOCKET_CREATE_FAILED);
         return -1;
     }
     qemu_setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, &on, sizeof(on));
@@ -293,6 +292,7 @@ static int inet_connect_addr(struct addrinfo *addr, bool *in_progress,
                              connect_state);
         *in_progress = true;
     } else if (rc < 0) {
+        error_set_errno(errp, errno, QERR_SOCKET_CONNECT_FAILED);
         closesocket(sock);
         return -1;
     }
@@ -375,7 +375,7 @@ int inet_connect_opts(QemuOpts *opts, Error **errp,
         if (connect_state != NULL) {
             connect_state->current_addr = e;
         }
-        sock = inet_connect_addr(e, &in_progress, connect_state);
+        sock = inet_connect_addr(e, &in_progress, connect_state, errp);
         if (in_progress) {
             return sock;
         } else if (sock >= 0) {
@@ -386,9 +386,6 @@ int inet_connect_opts(QemuOpts *opts, Error **errp,
             break;
         }
     }
-    if (sock < 0) {
-        error_set(errp, QERR_SOCKET_CONNECT_FAILED);
-    }
     g_free(connect_state);
     freeaddrinfo(res);
     return sock;
commit a12fb8ad5b444174ecf3c3270f93ec59ad177bf3
Author: Paolo Bonzini <pbonzini at redhat.com>
Date:   Tue Oct 2 09:18:02 2012 +0200

    qemu-sockets: include strerror or gai_strerror output in error messages
    
    Among others, before:
    
        $ qemu-system-x86_64 -chardev socket,port=12345,id=char
        inet_connect: host and/or port not specified
        chardev: opening backend "socket" failed
    
    After:
    
        $ x86_64-softmmu/qemu-system-x86_64 -chardev socket,port=12345,id=char
        qemu-system-x86_64: -chardev socket,port=12345,id=char: host and/or port not specified
        chardev: opening backend "socket" failed
    
    perror and fprintf can be removed because all clients can now
    consume Errors properly.
    
    Reviewed-by: Paolo Bonzini <pbonzini at redhat.com>
    Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>

diff --git a/qemu-sockets.c b/qemu-sockets.c
index d510ad1..9e3d233 100644
--- a/qemu-sockets.c
+++ b/qemu-sockets.c
@@ -120,8 +120,7 @@ int inet_listen_opts(QemuOpts *opts, int port_offset, Error **errp)
 
     if ((qemu_opt_get(opts, "host") == NULL) ||
         (qemu_opt_get(opts, "port") == NULL)) {
-        fprintf(stderr, "%s: host and/or port not specified\n", __FUNCTION__);
-        error_set(errp, QERR_SOCKET_CREATE_FAILED);
+        error_setg(errp, "host and/or port not specified");
         return -1;
     }
     pstrcpy(port, sizeof(port), qemu_opt_get(opts, "port"));
@@ -138,9 +137,8 @@ int inet_listen_opts(QemuOpts *opts, int port_offset, Error **errp)
         snprintf(port, sizeof(port), "%d", atoi(port) + port_offset);
     rc = getaddrinfo(strlen(addr) ? addr : NULL, port, &ai, &res);
     if (rc != 0) {
-        fprintf(stderr,"getaddrinfo(%s,%s): %s\n", addr, port,
-                gai_strerror(rc));
-        error_set(errp, QERR_SOCKET_CREATE_FAILED);
+        error_setg(errp, "address resolution failed for %s:%s: %s", addr, port,
+                   gai_strerror(rc));
         return -1;
     }
 
@@ -151,10 +149,8 @@ int inet_listen_opts(QemuOpts *opts, int port_offset, Error **errp)
 		        NI_NUMERICHOST | NI_NUMERICSERV);
         slisten = qemu_socket(e->ai_family, e->ai_socktype, e->ai_protocol);
         if (slisten < 0) {
-            fprintf(stderr,"%s: socket(%s): %s\n", __FUNCTION__,
-                    inet_strfamily(e->ai_family), strerror(errno));
             if (!e->ai_next) {
-                error_set(errp, QERR_SOCKET_CREATE_FAILED);
+                error_set_errno(errp, errno, QERR_SOCKET_CREATE_FAILED);
             }
             continue;
         }
@@ -176,24 +172,19 @@ int inet_listen_opts(QemuOpts *opts, int port_offset, Error **errp)
                 goto listen;
             }
             if (p == port_max) {
-                fprintf(stderr,"%s: bind(%s,%s,%d): %s\n", __FUNCTION__,
-                        inet_strfamily(e->ai_family), uaddr, inet_getport(e),
-                        strerror(errno));
                 if (!e->ai_next) {
-                    error_set(errp, QERR_SOCKET_BIND_FAILED);
+                    error_set_errno(errp, errno, QERR_SOCKET_BIND_FAILED);
                 }
             }
         }
         closesocket(slisten);
     }
-    fprintf(stderr, "%s: FAILED\n", __FUNCTION__);
     freeaddrinfo(res);
     return -1;
 
 listen:
     if (listen(slisten,1) != 0) {
-        error_set(errp, QERR_SOCKET_LISTEN_FAILED);
-        perror("listen");
+        error_set_errno(errp, errno, QERR_SOCKET_LISTEN_FAILED);
         closesocket(slisten);
         freeaddrinfo(res);
         return -1;
@@ -324,9 +315,7 @@ static struct addrinfo *inet_parse_connect_opts(QemuOpts *opts, Error **errp)
     addr = qemu_opt_get(opts, "host");
     port = qemu_opt_get(opts, "port");
     if (addr == NULL || port == NULL) {
-        fprintf(stderr,
-                "inet_parse_connect_opts: host and/or port not specified\n");
-        error_set(errp, QERR_SOCKET_CREATE_FAILED);
+        error_setg(errp, "host and/or port not specified");
         return NULL;
     }
 
@@ -340,9 +329,8 @@ static struct addrinfo *inet_parse_connect_opts(QemuOpts *opts, Error **errp)
     /* lookup */
     rc = getaddrinfo(addr, port, &ai, &res);
     if (rc != 0) {
-        fprintf(stderr, "getaddrinfo(%s,%s): %s\n", addr, port,
-                gai_strerror(rc));
-        error_set(errp, QERR_SOCKET_CREATE_FAILED);
+        error_setg(errp, "address resolution failed for %s:%s: %s", addr, port,
+                   gai_strerror(rc));
         return NULL;
     }
     return res;
commit 2d55f0e817b6fa260282f5b5c533bcd470c75a32
Author: Paolo Bonzini <pbonzini at redhat.com>
Date:   Tue Oct 2 10:17:21 2012 +0200

    vnc: add error propagation to vnc_display_open
    
    Before:
    
        $ qemu-system-x86_64 -vnc foo.bar:12345
        getaddrinfo(foo.bar,18245): Name or service not known
        Failed to start VNC server on `foo.bar:12345'
    
        $ qemu-system-x86_64 -vnc localhost:12345,reverse=on
        inet_connect_opts: connect(ipv4,yakj.usersys.redhat.com,127.0.0.1,12345): Connection refused
        Failed to start VNC server on `localhost:12345,reverse=on'
    
    After:
    
        $ x86_64-softmmu/qemu-system-x86_64 -vnc foo.bar:12345
        Failed to start VNC server on `foo.bar:12345': address resolution failed for foo.bar:18245: Name or service not known
    
        $ x86_64-softmmu/qemu-system-x86_64 -vnc localhost:12345,reverse=on
        Failed to start VNC server on `localhost:12345,reverse=on': Failed to connect to socket: Connection refused
    
    Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>

diff --git a/ui/vnc.c b/ui/vnc.c
index 46de820..9377569 100644
--- a/ui/vnc.c
+++ b/ui/vnc.c
@@ -2850,7 +2850,7 @@ char *vnc_display_local_addr(DisplayState *ds)
     return vnc_socket_local_addr("%s:%s", vs->lsock);
 }
 
-int vnc_display_open(DisplayState *ds, const char *display)
+void vnc_display_open(DisplayState *ds, const char *display, Error **errp)
 {
     VncDisplay *vs = ds ? (VncDisplay *)ds->opaque : vnc_display;
     const char *options;
@@ -2868,11 +2868,13 @@ int vnc_display_open(DisplayState *ds, const char *display)
 #endif
     int lock_key_sync = 1;
 
-    if (!vnc_display)
-        return -1;
+    if (!vnc_display) {
+        error_setg(errp, "VNC display not active");
+        return;
+    }
     vnc_display_close(ds);
     if (strcmp(display, "none") == 0)
-        return 0;
+        return;
 
     vs->display = g_strdup(display);
     vs->share_policy = VNC_SHARE_POLICY_ALLOW_EXCLUSIVE;
@@ -2882,10 +2884,10 @@ int vnc_display_open(DisplayState *ds, const char *display)
         options++;
         if (strncmp(options, "password", 8) == 0) {
             if (fips_get_state()) {
-                fprintf(stderr,
-                        "VNC password auth disabled due to FIPS mode, "
-                        "consider using the VeNCrypt or SASL authentication "
-                        "methods as an alternative\n");
+                error_setg(errp,
+                           "VNC password auth disabled due to FIPS mode, "
+                           "consider using the VeNCrypt or SASL authentication "
+                           "methods as an alternative");
                 goto fail;
             }
             password = 1; /* Require password auth */
@@ -2916,13 +2918,13 @@ int vnc_display_open(DisplayState *ds, const char *display)
 
                 VNC_DEBUG("Trying certificate path '%s'\n", path);
                 if (vnc_tls_set_x509_creds_dir(vs, path) < 0) {
-                    fprintf(stderr, "Failed to find x509 certificates/keys in %s\n", path);
+                    error_setg(errp, "Failed to find x509 certificates/keys in %s", path);
                     g_free(path);
                     goto fail;
                 }
                 g_free(path);
             } else {
-                fprintf(stderr, "No certificate path provided\n");
+                error_setg(errp, "No certificate path provided");
                 goto fail;
             }
 #endif
@@ -2942,7 +2944,7 @@ int vnc_display_open(DisplayState *ds, const char *display)
             } else if (strncmp(options+6, "force-shared", 12) == 0) {
                 vs->share_policy = VNC_SHARE_POLICY_FORCE_SHARED;
             } else {
-                fprintf(stderr, "unknown vnc share= option\n");
+                error_setg(errp, "unknown vnc share= option");
                 goto fail;
             }
         }
@@ -3044,8 +3046,8 @@ int vnc_display_open(DisplayState *ds, const char *display)
 
 #ifdef CONFIG_VNC_SASL
     if ((saslErr = sasl_server_init(NULL, "qemu")) != SASL_OK) {
-        fprintf(stderr, "Failed to initialize SASL auth %s",
-                sasl_errstring(saslErr, NULL, NULL));
+        error_setg(errp, "Failed to initialize SASL auth: %s",
+                   sasl_errstring(saslErr, NULL, NULL));
         goto fail;
     }
 #endif
@@ -3056,9 +3058,9 @@ int vnc_display_open(DisplayState *ds, const char *display)
         int csock;
         vs->lsock = -1;
         if (strncmp(display, "unix:", 5) == 0) {
-            csock = unix_connect(display+5, NULL);
+            csock = unix_connect(display+5, errp);
         } else {
-            csock = inet_connect(display, NULL);
+            csock = inet_connect(display, errp);
         }
         if (csock < 0) {
             goto fail;
@@ -3070,10 +3072,10 @@ int vnc_display_open(DisplayState *ds, const char *display)
         dpy = g_malloc(256);
         if (strncmp(display, "unix:", 5) == 0) {
             pstrcpy(dpy, 256, "unix:");
-            vs->lsock = unix_listen(display+5, dpy+5, 256-5, NULL);
+            vs->lsock = unix_listen(display+5, dpy+5, 256-5, errp);
         } else {
             vs->lsock = inet_listen(display, dpy, 256,
-                                    SOCK_STREAM, 5900, NULL);
+                                    SOCK_STREAM, 5900, errp);
         }
         if (vs->lsock < 0) {
             g_free(dpy);
@@ -3083,12 +3085,14 @@ int vnc_display_open(DisplayState *ds, const char *display)
         vs->display = dpy;
         qemu_set_fd_handler2(vs->lsock, NULL, vnc_listen_read, NULL, vs);
     }
-    return 0;
+    return;
 
 fail:
+    if (!error_is_set(errp)) {
+        error_set(errp, QERR_VNC_SERVER_FAILED, display);
+    }
     g_free(vs->display);
     vs->display = NULL;
-    return -1;
 }
 
 void vnc_display_add_client(DisplayState *ds, int csock, int skipauth)
diff --git a/vl.c b/vl.c
index 0597888..9f99ef4 100644
--- a/vl.c
+++ b/vl.c
@@ -3711,10 +3711,13 @@ int main(int argc, char **argv, char **envp)
 #ifdef CONFIG_VNC
     /* init remote displays */
     if (vnc_display) {
+        Error *local_err = NULL;
         vnc_display_init(ds);
-        if (vnc_display_open(ds, vnc_display) < 0) {
-            fprintf(stderr, "Failed to start VNC server on `%s'\n",
-                    vnc_display);
+        vnc_display_open(ds, vnc_display, &local_err);
+        if (local_err != NULL) {
+            fprintf(stderr, "Failed to start VNC server on `%s': %s\n",
+                    vnc_display, error_get_pretty(local_err));
+            error_free(local_err);
             exit(1);
         }
 
commit 007fcd3ee9673b3d00baacf3765bd501296155cd
Author: Paolo Bonzini <pbonzini at redhat.com>
Date:   Thu Oct 18 09:01:01 2012 +0200

    vnc: reorganize code for reverse mode
    
    Avoid the dance between csock and vs->lsock.
    
    Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>

diff --git a/console.h b/console.h
index f990684..6099d8d 100644
--- a/console.h
+++ b/console.h
@@ -378,7 +378,7 @@ void cocoa_display_init(DisplayState *ds, int full_screen);
 /* vnc.c */
 void vnc_display_init(DisplayState *ds);
 void vnc_display_close(DisplayState *ds);
-int vnc_display_open(DisplayState *ds, const char *display);
+void vnc_display_open(DisplayState *ds, const char *display, Error **errp);
 void vnc_display_add_client(DisplayState *ds, int csock, int skipauth);
 int vnc_display_disable_login(DisplayState *ds);
 char *vnc_display_local_addr(DisplayState *ds);
diff --git a/qmp.c b/qmp.c
index 36c54c5..31bc3bf 100644
--- a/qmp.c
+++ b/qmp.c
@@ -349,11 +349,9 @@ void qmp_change_vnc_password(const char *password, Error **errp)
     }
 }
 
-static void qmp_change_vnc_listen(const char *target, Error **err)
+static void qmp_change_vnc_listen(const char *target, Error **errp)
 {
-    if (vnc_display_open(NULL, target) < 0) {
-        error_set(err, QERR_VNC_SERVER_FAILED, target);
-    }
+    vnc_display_open(NULL, target, errp);
 }
 
 static void qmp_change_vnc(const char *target, bool has_arg, const char *arg,
diff --git a/ui/vnc.c b/ui/vnc.c
index 72d6f68..46de820 100644
--- a/ui/vnc.c
+++ b/ui/vnc.c
@@ -3053,19 +3053,17 @@ int vnc_display_open(DisplayState *ds, const char *display)
 
     if (reverse) {
         /* connect to viewer */
-        if (strncmp(display, "unix:", 5) == 0)
-            vs->lsock = unix_connect(display+5, NULL);
-        else
-            vs->lsock = inet_connect(display, NULL);
-        if (vs->lsock < 0) {
-            goto fail;
+        int csock;
+        vs->lsock = -1;
+        if (strncmp(display, "unix:", 5) == 0) {
+            csock = unix_connect(display+5, NULL);
         } else {
-            int csock = vs->lsock;
-            vs->lsock = -1;
-            vnc_connect(vs, csock, 0);
+            csock = inet_connect(display, NULL);
         }
-        return 0;
-
+        if (csock < 0) {
+            goto fail;
+        }
+        vnc_connect(vs, csock, 0);
     } else {
         /* listen for connects */
         char *dpy;
commit 1ce52c78ab90c4303bcb110f2c614410386d79a2
Author: Paolo Bonzini <pbonzini at redhat.com>
Date:   Thu Oct 18 09:07:05 2012 +0200

    vnc: introduce a single label for error returns
    
    Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>

diff --git a/ui/vnc.c b/ui/vnc.c
index 64c4092..72d6f68 100644
--- a/ui/vnc.c
+++ b/ui/vnc.c
@@ -2874,8 +2874,7 @@ int vnc_display_open(DisplayState *ds, const char *display)
     if (strcmp(display, "none") == 0)
         return 0;
 
-    if (!(vs->display = strdup(display)))
-        return -1;
+    vs->display = g_strdup(display);
     vs->share_policy = VNC_SHARE_POLICY_ALLOW_EXCLUSIVE;
 
     options = display;
@@ -2887,9 +2886,7 @@ int vnc_display_open(DisplayState *ds, const char *display)
                         "VNC password auth disabled due to FIPS mode, "
                         "consider using the VeNCrypt or SASL authentication "
                         "methods as an alternative\n");
-                g_free(vs->display);
-                vs->display = NULL;
-                return -1;
+                goto fail;
             }
             password = 1; /* Require password auth */
         } else if (strncmp(options, "reverse", 7) == 0) {
@@ -2921,16 +2918,12 @@ int vnc_display_open(DisplayState *ds, const char *display)
                 if (vnc_tls_set_x509_creds_dir(vs, path) < 0) {
                     fprintf(stderr, "Failed to find x509 certificates/keys in %s\n", path);
                     g_free(path);
-                    g_free(vs->display);
-                    vs->display = NULL;
-                    return -1;
+                    goto fail;
                 }
                 g_free(path);
             } else {
                 fprintf(stderr, "No certificate path provided\n");
-                g_free(vs->display);
-                vs->display = NULL;
-                return -1;
+                goto fail;
             }
 #endif
 #if defined(CONFIG_VNC_TLS) || defined(CONFIG_VNC_SASL)
@@ -2950,9 +2943,7 @@ int vnc_display_open(DisplayState *ds, const char *display)
                 vs->share_policy = VNC_SHARE_POLICY_FORCE_SHARED;
             } else {
                 fprintf(stderr, "unknown vnc share= option\n");
-                g_free(vs->display);
-                vs->display = NULL;
-                return -1;
+                goto fail;
             }
         }
     }
@@ -3055,9 +3046,7 @@ int vnc_display_open(DisplayState *ds, const char *display)
     if ((saslErr = sasl_server_init(NULL, "qemu")) != SASL_OK) {
         fprintf(stderr, "Failed to initialize SASL auth %s",
                 sasl_errstring(saslErr, NULL, NULL));
-        g_free(vs->display);
-        vs->display = NULL;
-        return -1;
+        goto fail;
     }
 #endif
     vs->lock_key_sync = lock_key_sync;
@@ -3069,9 +3058,7 @@ int vnc_display_open(DisplayState *ds, const char *display)
         else
             vs->lsock = inet_connect(display, NULL);
         if (vs->lsock < 0) {
-            g_free(vs->display);
-            vs->display = NULL;
-            return -1;
+            goto fail;
         } else {
             int csock = vs->lsock;
             vs->lsock = -1;
@@ -3092,13 +3079,18 @@ int vnc_display_open(DisplayState *ds, const char *display)
         }
         if (vs->lsock < 0) {
             g_free(dpy);
-            return -1;
-        } else {
-            g_free(vs->display);
-            vs->display = dpy;
+            goto fail;
         }
+        g_free(vs->display);
+        vs->display = dpy;
+        qemu_set_fd_handler2(vs->lsock, NULL, vnc_listen_read, NULL, vs);
     }
-    return qemu_set_fd_handler2(vs->lsock, NULL, vnc_listen_read, NULL, vs);
+    return 0;
+
+fail:
+    g_free(vs->display);
+    vs->display = NULL;
+    return -1;
 }
 
 void vnc_display_add_client(DisplayState *ds, int csock, int skipauth)
commit c1c1619c8b100f675bc97198218f3fcb0a740921
Author: Paolo Bonzini <pbonzini at redhat.com>
Date:   Thu Oct 18 09:06:21 2012 +0200

    vnc: avoid Yoda conditionals
    
    Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>

diff --git a/ui/vnc.c b/ui/vnc.c
index ceade57..64c4092 100644
--- a/ui/vnc.c
+++ b/ui/vnc.c
@@ -3068,7 +3068,7 @@ int vnc_display_open(DisplayState *ds, const char *display)
             vs->lsock = unix_connect(display+5, NULL);
         else
             vs->lsock = inet_connect(display, NULL);
-        if (-1 == vs->lsock) {
+        if (vs->lsock < 0) {
             g_free(vs->display);
             vs->display = NULL;
             return -1;
@@ -3090,7 +3090,7 @@ int vnc_display_open(DisplayState *ds, const char *display)
             vs->lsock = inet_listen(display, dpy, 256,
                                     SOCK_STREAM, 5900, NULL);
         }
-        if (-1 == vs->lsock) {
+        if (vs->lsock < 0) {
             g_free(dpy);
             return -1;
         } else {
commit 90119816e36ba019650214e7efeccdac1d4a9e32
Author: Paolo Bonzini <pbonzini at redhat.com>
Date:   Tue Oct 2 10:09:14 2012 +0200

    qemu-ga: ask and print error information from qemu-sockets
    
    Reviewed-by: Luiz Capitulino <lcapitulino at redhat.com>
    Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>

diff --git a/qga/channel-posix.c b/qga/channel-posix.c
index e22eee6..d152827 100644
--- a/qga/channel-posix.c
+++ b/qga/channel-posix.c
@@ -181,9 +181,11 @@ static gboolean ga_channel_open(GAChannel *c, const gchar *path, GAChannelMethod
         break;
     }
     case GA_CHANNEL_UNIX_LISTEN: {
-        int fd = unix_listen(path, NULL, strlen(path), NULL);
-        if (fd == -1) {
-            g_critical("error opening path: %s", strerror(errno));
+        Error *local_err = NULL;
+        int fd = unix_listen(path, NULL, strlen(path), &local_err);
+        if (local_err != NULL) {
+            g_critical("%s", error_get_pretty(local_err));
+            error_free(local_err);
             return false;
         }
         ga_channel_listen_add(c, fd, true);
commit f8430e7621b3319f6b94c735c811b2a2448cd6ea
Author: Paolo Bonzini <pbonzini at redhat.com>
Date:   Tue Oct 2 10:07:21 2012 +0200

    nbd: ask and print error information from qemu-sockets
    
    Before:
    
        $ qemu-system-x86_64 nbd:localhost:12345
        inet_connect_opts: connect(ipv4,yakj.usersys.redhat.com,127.0.0.1,12345): Connection refused
        qemu-system-x86_64: could not open disk image nbd:localhost:12345: Connection refused
    
    After:
    
        $ x86_64-softmmu/qemu-system-x86_64 nbd:localhost:12345
        qemu-system-x86_64: Failed to connect to socket: Connection refused
        qemu-system-x86_64: could not open disk image nbd:localhost:12345: Connection refused
    
    Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>

diff --git a/nbd.c b/nbd.c
index f61a288..cec5a94 100644
--- a/nbd.c
+++ b/nbd.c
@@ -208,7 +208,14 @@ int tcp_socket_outgoing(const char *address, uint16_t port)
 
 int tcp_socket_outgoing_spec(const char *address_and_port)
 {
-    return inet_connect(address_and_port, NULL);
+    Error *local_err = NULL;
+    int fd = inet_connect(address_and_port, &local_err);
+
+    if (local_err != NULL) {
+        qerror_report_err(local_err);
+        error_free(local_err);
+    }
+    return fd;
 }
 
 int tcp_socket_incoming(const char *address, uint16_t port)
@@ -220,22 +227,38 @@ int tcp_socket_incoming(const char *address, uint16_t port)
 
 int tcp_socket_incoming_spec(const char *address_and_port)
 {
-    char *ostr  = NULL;
-    int olen = 0;
-    return inet_listen(address_and_port, ostr, olen, SOCK_STREAM, 0, NULL);
+    Error *local_err = NULL;
+    int fd = inet_listen(address_and_port, NULL, 0, SOCK_STREAM, 0, &local_err);
+
+    if (local_err != NULL) {
+        qerror_report_err(local_err);
+        error_free(local_err);
+    }
+    return fd;
 }
 
 int unix_socket_incoming(const char *path)
 {
-    char *ostr = NULL;
-    int olen = 0;
+    Error *local_err = NULL;
+    int fd = unix_listen(path, NULL, 0, &local_err);
 
-    return unix_listen(path, ostr, olen, NULL);
+    if (local_err != NULL) {
+        qerror_report_err(local_err);
+        error_free(local_err);
+    }
+    return fd;
 }
 
 int unix_socket_outgoing(const char *path)
 {
-    return unix_connect(path, NULL);
+    Error *local_err = NULL;
+    int fd = unix_connect(path, &local_err);
+
+    if (local_err != NULL) {
+        qerror_report_err(local_err);
+        error_free(local_err);
+    }
+    return fd;
 }
 
 /* Basic flow for negotiation
commit 87d5f24f3f5edc4b69d071d5966cea0aec7b571d
Author: Paolo Bonzini <pbonzini at redhat.com>
Date:   Tue Oct 2 09:16:49 2012 +0200

    qemu-char: ask and print error information from qemu-sockets
    
    Before:
    
        $ qemu-system-x86_64 -monitor tcp:localhost:6000
        (starts despite error)
    
        $ qemu-system-x86_64 -monitor tcp:foo.bar:12345
        getaddrinfo(foo.bar,12345): Name or service not known
        chardev: opening backend "socket" failed
    
        $ qemu-system-x86_64 -monitor tcp:localhost:443,server=on
        inet_listen_opts: bind(ipv4,127.0.0.1,443): Permission denied
        inet_listen_opts: FAILED
        chardev: opening backend "socket" failed
    
    After:
    
        $ x86_64-softmmu/qemu-system-x86_64 -monitor tcp:localhost:6000
        x86_64-softmmu/qemu-system-x86_64: -monitor tcp:localhost:6000: Failed to connect to socket: Connection refused
        chardev: opening backend "socket" failed
    
        $ x86_64-softmmu/qemu-system-x86_64 -monitor tcp:foo.bar:12345
        qemu-system-x86_64: -monitor tcp:foo.bar:12345: address resolution failed for foo.bar:12345: Name or service not known
        chardev: opening backend "socket" failed
    
        $ x86_64-softmmu/qemu-system-x86_64 -monitor tcp:localhost:443,server=on
        qemu-system-x86_64: -monitor tcp:localhost:443,server=on: Failed to bind socket: Permission denied
        chardev: opening backend "socket" failed
    
    Reviewed-by: Luiz Capitulino <lcapitulino at redhat.com>
    Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>

diff --git a/qemu-char.c b/qemu-char.c
index 8ebd582..04b5c23 100644
--- a/qemu-char.c
+++ b/qemu-char.c
@@ -2097,12 +2097,13 @@ static CharDriverState *qemu_chr_open_udp(QemuOpts *opts)
 {
     CharDriverState *chr = NULL;
     NetCharDriver *s = NULL;
+    Error *local_err = NULL;
     int fd = -1;
 
     chr = g_malloc0(sizeof(CharDriverState));
     s = g_malloc0(sizeof(NetCharDriver));
 
-    fd = inet_dgram_opts(opts, NULL);
+    fd = inet_dgram_opts(opts, &local_err);
     if (fd < 0) {
         fprintf(stderr, "inet_dgram_opts failed\n");
         goto return_err;
@@ -2118,6 +2119,10 @@ static CharDriverState *qemu_chr_open_udp(QemuOpts *opts)
     return chr;
 
 return_err:
+    if (local_err) {
+        qerror_report_err(local_err);
+        error_free(local_err);
+    }
     g_free(chr);
     g_free(s);
     if (fd >= 0) {
@@ -2428,6 +2433,7 @@ static CharDriverState *qemu_chr_open_socket(QemuOpts *opts)
 {
     CharDriverState *chr = NULL;
     TCPCharDriver *s = NULL;
+    Error *local_err = NULL;
     int fd = -1;
     int is_listen;
     int is_waitconnect;
@@ -2448,15 +2454,15 @@ static CharDriverState *qemu_chr_open_socket(QemuOpts *opts)
 
     if (is_unix) {
         if (is_listen) {
-            fd = unix_listen_opts(opts, NULL);
+            fd = unix_listen_opts(opts, &local_err);
         } else {
-            fd = unix_connect_opts(opts, NULL, NULL, NULL);
+            fd = unix_connect_opts(opts, &local_err, NULL, NULL);
         }
     } else {
         if (is_listen) {
-            fd = inet_listen_opts(opts, 0, NULL);
+            fd = inet_listen_opts(opts, 0, &local_err);
         } else {
-            fd = inet_connect_opts(opts, NULL, NULL, NULL);
+            fd = inet_connect_opts(opts, &local_err, NULL, NULL);
         }
     }
     if (fd < 0) {
@@ -2517,8 +2523,13 @@ static CharDriverState *qemu_chr_open_socket(QemuOpts *opts)
     return chr;
 
  fail:
-    if (fd >= 0)
+    if (local_err) {
+        qerror_report_err(local_err);
+        error_free(local_err);
+    }
+    if (fd >= 0) {
         closesocket(fd);
+    }
     g_free(s);
     g_free(chr);
     return NULL;
commit 43eaae28e0394f8fb80848fb40aa5d28c6360321
Author: Paolo Bonzini <pbonzini at redhat.com>
Date:   Tue Oct 2 18:21:18 2012 +0200

    migration (incoming): add error propagation to fd and exec protocols
    
    And remove the superfluous integer return value.
    
    Reviewed-by: Luiz Capitulino <lcapitulino at redhat.com>
    Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>

diff --git a/migration-exec.c b/migration-exec.c
index 5f3f4b2..519af57 100644
--- a/migration-exec.c
+++ b/migration-exec.c
@@ -92,19 +92,17 @@ static void exec_accept_incoming_migration(void *opaque)
     qemu_fclose(f);
 }
 
-int exec_start_incoming_migration(const char *command)
+void exec_start_incoming_migration(const char *command, Error **errp)
 {
     QEMUFile *f;
 
     DPRINTF("Attempting to start an incoming migration\n");
     f = qemu_popen_cmd(command, "r");
     if(f == NULL) {
-        DPRINTF("Unable to apply qemu wrapper to popen file\n");
-        return -errno;
+        error_setg_errno(errp, errno, "failed to popen the migration source");
+        return;
     }
 
     qemu_set_fd_handler2(qemu_stdio_fd(f), NULL,
 			 exec_accept_incoming_migration, NULL, f);
-
-    return 0;
 }
diff --git a/migration-fd.c b/migration-fd.c
index a7c800a..ce6932d 100644
--- a/migration-fd.c
+++ b/migration-fd.c
@@ -97,7 +97,7 @@ static void fd_accept_incoming_migration(void *opaque)
     qemu_fclose(f);
 }
 
-int fd_start_incoming_migration(const char *infd)
+void fd_start_incoming_migration(const char *infd, Error **errp)
 {
     int fd;
     QEMUFile *f;
@@ -107,11 +107,9 @@ int fd_start_incoming_migration(const char *infd)
     fd = strtol(infd, NULL, 0);
     f = qemu_fdopen(fd, "rb");
     if(f == NULL) {
-        DPRINTF("Unable to apply qemu wrapper to file descriptor\n");
-        return -errno;
+        error_setg_errno(errp, errno, "failed to open the source descriptor");
+        return;
     }
 
     qemu_set_fd_handler2(fd, NULL, fd_accept_incoming_migration, NULL, f);
-
-    return 0;
 }
diff --git a/migration-tcp.c b/migration-tcp.c
index 5e54e68..46f6ac5 100644
--- a/migration-tcp.c
+++ b/migration-tcp.c
@@ -111,18 +111,15 @@ out2:
     close(s);
 }
 
-int tcp_start_incoming_migration(const char *host_port, Error **errp)
+void tcp_start_incoming_migration(const char *host_port, Error **errp)
 {
     int s;
 
     s = inet_listen(host_port, NULL, 256, SOCK_STREAM, 0, errp);
-
     if (s < 0) {
-        return -1;
+        return;
     }
 
     qemu_set_fd_handler2(s, NULL, tcp_accept_incoming_migration, NULL,
                          (void *)(intptr_t)s);
-
-    return 0;
 }
diff --git a/migration-unix.c b/migration-unix.c
index 34a413a..ed3db3a 100644
--- a/migration-unix.c
+++ b/migration-unix.c
@@ -111,16 +111,15 @@ out2:
     close(s);
 }
 
-int unix_start_incoming_migration(const char *path, Error **errp)
+void unix_start_incoming_migration(const char *path, Error **errp)
 {
     int s;
 
     s = unix_listen(path, NULL, 0, errp);
     if (s < 0) {
-        return -1;
+        return;
     }
 
     qemu_set_fd_handler2(s, NULL, unix_accept_incoming_migration, NULL,
                          (void *)(intptr_t)s);
-    return 0;
 }
diff --git a/migration.c b/migration.c
index ef171d2..bd55a15 100644
--- a/migration.c
+++ b/migration.c
@@ -64,26 +64,23 @@ MigrationState *migrate_get_current(void)
     return &current_migration;
 }
 
-int qemu_start_incoming_migration(const char *uri, Error **errp)
+void qemu_start_incoming_migration(const char *uri, Error **errp)
 {
     const char *p;
-    int ret;
 
     if (strstart(uri, "tcp:", &p))
-        ret = tcp_start_incoming_migration(p, errp);
+        tcp_start_incoming_migration(p, errp);
 #if !defined(WIN32)
     else if (strstart(uri, "exec:", &p))
-        ret =  exec_start_incoming_migration(p);
+        exec_start_incoming_migration(p, errp);
     else if (strstart(uri, "unix:", &p))
-        ret = unix_start_incoming_migration(p, errp);
+        unix_start_incoming_migration(p, errp);
     else if (strstart(uri, "fd:", &p))
-        ret = fd_start_incoming_migration(p);
+        fd_start_incoming_migration(p, errp);
 #endif
     else {
-        fprintf(stderr, "unknown migration protocol: %s\n", uri);
-        ret = -EPROTONOSUPPORT;
+        error_setg(errp, "unknown migration protocol: %s\n", uri);
     }
-    return ret;
 }
 
 void process_incoming_migration(QEMUFile *f)
diff --git a/migration.h b/migration.h
index c805e28..c3a23cc 100644
--- a/migration.h
+++ b/migration.h
@@ -49,7 +49,7 @@ struct MigrationState
 
 void process_incoming_migration(QEMUFile *f);
 
-int qemu_start_incoming_migration(const char *uri, Error **errp);
+void qemu_start_incoming_migration(const char *uri, Error **errp);
 
 uint64_t migrate_max_downtime(void);
 
@@ -57,19 +57,19 @@ void do_info_migrate_print(Monitor *mon, const QObject *data);
 
 void do_info_migrate(Monitor *mon, QObject **ret_data);
 
-int exec_start_incoming_migration(const char *host_port);
+void exec_start_incoming_migration(const char *host_port, Error **errp);
 
 void exec_start_outgoing_migration(MigrationState *s, const char *host_port, Error **errp);
 
-int tcp_start_incoming_migration(const char *host_port, Error **errp);
+void tcp_start_incoming_migration(const char *host_port, Error **errp);
 
 void tcp_start_outgoing_migration(MigrationState *s, const char *host_port, Error **errp);
 
-int unix_start_incoming_migration(const char *path, Error **errp);
+void unix_start_incoming_migration(const char *path, Error **errp);
 
 void unix_start_outgoing_migration(MigrationState *s, const char *path, Error **errp);
 
-int fd_start_incoming_migration(const char *path);
+void fd_start_incoming_migration(const char *path, Error **errp);
 
 void fd_start_outgoing_migration(MigrationState *s, const char *fdname, Error **errp);
 
diff --git a/vl.c b/vl.c
index ee3c43a..0597888 100644
--- a/vl.c
+++ b/vl.c
@@ -3766,16 +3766,12 @@ int main(int argc, char **argv, char **envp)
     }
 
     if (incoming) {
-        Error *errp = NULL;
-        int ret = qemu_start_incoming_migration(incoming, &errp);
-        if (ret < 0) {
-            if (error_is_set(&errp)) {
-                fprintf(stderr, "Migrate: %s\n", error_get_pretty(errp));
-                error_free(errp);
-            }
-            fprintf(stderr, "Migration failed. Exit code %s(%d), exiting.\n",
-                    incoming, ret);
-            exit(ret);
+        Error *local_err = NULL;
+        qemu_start_incoming_migration(incoming, &local_err);
+        if (local_err) {
+            fprintf(stderr, "-incoming %s: %s\n", incoming, error_get_pretty(local_err));
+            error_free(local_err);
+            exit(1);
         }
     } else if (autostart) {
         vm_start();
commit f37afb5ab1921f42043b5527a517eef95c36acf8
Author: Paolo Bonzini <pbonzini at redhat.com>
Date:   Tue Oct 2 10:02:46 2012 +0200

    migration (outgoing): add error propagation for all protocols
    
    Error propagation is already there for socket backends.  Add it to other
    protocols, simplifying code that tests for errors that will never happen.
    With all protocols understanding Error, the code can be simplified
    further by removing the return value.
    
    Unfortunately, the quality of error messages varies depending
    on where the error is detected, because no Error is passed to the
    NonBlockingConnectHandler.  Thus, the exact error message still cannot
    be sent to the user if the OS reports it asynchronously via SO_ERROR.
    If NonBlockingConnectHandler received an Error**, we could for
    example report the error class and/or message via a new field of the
    query-migration command even if it is reported asynchronously.
    
    Before:
    
        (qemu) migrate fd:ffff
        migrate: An undefined error has occurred
        (qemu) info migrate
        (qemu)
    
    After:
    
        (qemu) migrate fd:ffff
        migrate: File descriptor named 'ffff' has not been found
        (qemu) info migrate
        capabilities: xbzrle: off
        Migration status: failed
        total time: 0 milliseconds
    
    Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>

diff --git a/migration-exec.c b/migration-exec.c
index 6c97db9..5f3f4b2 100644
--- a/migration-exec.c
+++ b/migration-exec.c
@@ -60,22 +60,18 @@ static int exec_close(MigrationState *s)
     return ret;
 }
 
-int exec_start_outgoing_migration(MigrationState *s, const char *command)
+void exec_start_outgoing_migration(MigrationState *s, const char *command, Error **errp)
 {
     FILE *f;
 
     f = popen(command, "w");
     if (f == NULL) {
-        DPRINTF("Unable to popen exec target\n");
-        goto err_after_popen;
+        error_setg_errno(errp, errno, "failed to popen the migration target");
+        return;
     }
 
     s->fd = fileno(f);
-    if (s->fd == -1) {
-        DPRINTF("Unable to retrieve file descriptor for popen'd handle\n");
-        goto err_after_open;
-    }
-
+    assert(s->fd != -1);
     socket_set_nonblock(s->fd);
 
     s->opaque = qemu_popen(f, "w");
@@ -85,12 +81,6 @@ int exec_start_outgoing_migration(MigrationState *s, const char *command)
     s->write = file_write;
 
     migrate_fd_connect(s);
-    return 0;
-
-err_after_open:
-    pclose(f);
-err_after_popen:
-    return -1;
 }
 
 static void exec_accept_incoming_migration(void *opaque)
diff --git a/migration-fd.c b/migration-fd.c
index 7335167..a7c800a 100644
--- a/migration-fd.c
+++ b/migration-fd.c
@@ -73,30 +73,19 @@ static int fd_close(MigrationState *s)
     return 0;
 }
 
-int fd_start_outgoing_migration(MigrationState *s, const char *fdname)
+void fd_start_outgoing_migration(MigrationState *s, const char *fdname, Error **errp)
 {
-    s->fd = monitor_get_fd(cur_mon, fdname, NULL);
+    s->fd = monitor_get_fd(cur_mon, fdname, errp);
     if (s->fd == -1) {
-        DPRINTF("fd_migration: invalid file descriptor identifier\n");
-        goto err_after_get_fd;
-    }
-
-    if (fcntl(s->fd, F_SETFL, O_NONBLOCK) == -1) {
-        DPRINTF("Unable to set nonblocking mode on file descriptor\n");
-        goto err_after_open;
+        return;
     }
 
+    fcntl(s->fd, F_SETFL, O_NONBLOCK);
     s->get_error = fd_errno;
     s->write = fd_write;
     s->close = fd_close;
 
     migrate_fd_connect(s);
-    return 0;
-
-err_after_open:
-    close(s->fd);
-err_after_get_fd:
-    return -1;
 }
 
 static void fd_accept_incoming_migration(void *opaque)
diff --git a/migration-tcp.c b/migration-tcp.c
index e8bc76a..5e54e68 100644
--- a/migration-tcp.c
+++ b/migration-tcp.c
@@ -68,22 +68,13 @@ static void tcp_wait_for_connect(int fd, void *opaque)
     }
 }
 
-int tcp_start_outgoing_migration(MigrationState *s, const char *host_port,
-                                 Error **errp)
+void tcp_start_outgoing_migration(MigrationState *s, const char *host_port, Error **errp)
 {
-    Error *local_err = NULL;
-
     s->get_error = socket_errno;
     s->write = socket_write;
     s->close = tcp_close;
 
-    s->fd = inet_nonblocking_connect(host_port, tcp_wait_for_connect, s, &local_err);
-    if (local_err != NULL) {
-        error_propagate(errp, local_err);
-        return -1;
-    }
-
-    return 0;
+    s->fd = inet_nonblocking_connect(host_port, tcp_wait_for_connect, s, errp);
 }
 
 static void tcp_accept_incoming_migration(void *opaque)
diff --git a/migration-unix.c b/migration-unix.c
index 5387c21..34a413a 100644
--- a/migration-unix.c
+++ b/migration-unix.c
@@ -68,20 +68,13 @@ static void unix_wait_for_connect(int fd, void *opaque)
     }
 }
 
-int unix_start_outgoing_migration(MigrationState *s, const char *path, Error **errp)
+void unix_start_outgoing_migration(MigrationState *s, const char *path, Error **errp)
 {
-    Error *local_err = NULL;
-
     s->get_error = unix_errno;
     s->write = unix_write;
     s->close = unix_close;
 
-    s->fd = unix_nonblocking_connect(path, unix_wait_for_connect, s, &local_err);
-    if (local_err != NULL) {
-        error_propagate(errp, local_err);
-        return -1;
-    }
-    return 0;
+    s->fd = unix_nonblocking_connect(path, unix_wait_for_connect, s, errp);
 }
 
 static void unix_accept_incoming_migration(void *opaque)
diff --git a/migration.c b/migration.c
index e631f18..ef171d2 100644
--- a/migration.c
+++ b/migration.c
@@ -487,7 +487,6 @@ void qmp_migrate(const char *uri, bool has_blk, bool blk,
     MigrationState *s = migrate_get_current();
     MigrationParams params;
     const char *p;
-    int ret;
 
     params.blk = blk;
     params.shared = inc;
@@ -509,27 +508,23 @@ void qmp_migrate(const char *uri, bool has_blk, bool blk,
     s = migrate_init(&params);
 
     if (strstart(uri, "tcp:", &p)) {
-        ret = tcp_start_outgoing_migration(s, p, &local_err);
+        tcp_start_outgoing_migration(s, p, &local_err);
 #if !defined(WIN32)
     } else if (strstart(uri, "exec:", &p)) {
-        ret = exec_start_outgoing_migration(s, p);
+        exec_start_outgoing_migration(s, p, &local_err);
     } else if (strstart(uri, "unix:", &p)) {
-        ret = unix_start_outgoing_migration(s, p, &local_err);
+        unix_start_outgoing_migration(s, p, &local_err);
     } else if (strstart(uri, "fd:", &p)) {
-        ret = fd_start_outgoing_migration(s, p);
+        fd_start_outgoing_migration(s, p, &local_err);
 #endif
     } else {
         error_set(errp, QERR_INVALID_PARAMETER_VALUE, "uri", "a valid migration protocol");
         return;
     }
 
-    if (ret < 0 || local_err) {
+    if (local_err) {
         migrate_fd_error(s);
-        if (!local_err) {
-            error_set_errno(errp, -ret, QERR_UNDEFINED_ERROR);
-        } else {
-            error_propagate(errp, local_err);
-        }
+        error_propagate(errp, local_err);
         return;
     }
 
diff --git a/migration.h b/migration.h
index 92e0aa9..c805e28 100644
--- a/migration.h
+++ b/migration.h
@@ -59,20 +59,19 @@ void do_info_migrate(Monitor *mon, QObject **ret_data);
 
 int exec_start_incoming_migration(const char *host_port);
 
-int exec_start_outgoing_migration(MigrationState *s, const char *host_port);
+void exec_start_outgoing_migration(MigrationState *s, const char *host_port, Error **errp);
 
 int tcp_start_incoming_migration(const char *host_port, Error **errp);
 
-int tcp_start_outgoing_migration(MigrationState *s, const char *host_port,
-                                 Error **errp);
+void tcp_start_outgoing_migration(MigrationState *s, const char *host_port, Error **errp);
 
 int unix_start_incoming_migration(const char *path, Error **errp);
 
-int unix_start_outgoing_migration(MigrationState *s, const char *path, Error **errp);
+void unix_start_outgoing_migration(MigrationState *s, const char *path, Error **errp);
 
 int fd_start_incoming_migration(const char *path);
 
-int fd_start_outgoing_migration(MigrationState *s, const char *fdname);
+void fd_start_outgoing_migration(MigrationState *s, const char *fdname, Error **errp);
 
 void migrate_fd_error(MigrationState *s);
 
commit e08c95ce8d66276198704f21ed9df6d99b3477e0
Author: Paolo Bonzini <pbonzini at redhat.com>
Date:   Wed Oct 3 14:05:49 2012 +0200

    migration: use qemu-sockets to establish Unix sockets
    
    This makes migration-unix.c again a cut-and-paste job from migration-tcp.c,
    exactly as it was in the beginning. :)
    
    Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>

diff --git a/migration-unix.c b/migration-unix.c
index d349662..5387c21 100644
--- a/migration-unix.c
+++ b/migration-unix.c
@@ -53,67 +53,34 @@ static int unix_close(MigrationState *s)
     return r;
 }
 
-static void unix_wait_for_connect(void *opaque)
+static void unix_wait_for_connect(int fd, void *opaque)
 {
     MigrationState *s = opaque;
-    int val, ret;
-    socklen_t valsize = sizeof(val);
 
-    DPRINTF("connect completed\n");
-    do {
-        ret = getsockopt(s->fd, SOL_SOCKET, SO_ERROR, (void *) &val, &valsize);
-    } while (ret == -1 && errno == EINTR);
-
-    if (ret < 0) {
+    if (fd < 0) {
+        DPRINTF("migrate connect error\n");
+        s->fd = -1;
         migrate_fd_error(s);
-        return;
-    }
-
-    qemu_set_fd_handler2(s->fd, NULL, NULL, NULL, NULL);
-
-    if (val == 0)
+    } else {
+        DPRINTF("migrate connect success\n");
+        s->fd = fd;
         migrate_fd_connect(s);
-    else {
-        DPRINTF("error connecting %d\n", val);
-        migrate_fd_error(s);
     }
 }
 
-int unix_start_outgoing_migration(MigrationState *s, const char *path)
+int unix_start_outgoing_migration(MigrationState *s, const char *path, Error **errp)
 {
-    struct sockaddr_un addr;
-    int ret;
+    Error *local_err = NULL;
 
-    addr.sun_family = AF_UNIX;
-    snprintf(addr.sun_path, sizeof(addr.sun_path), "%s", path);
     s->get_error = unix_errno;
     s->write = unix_write;
     s->close = unix_close;
 
-    s->fd = qemu_socket(PF_UNIX, SOCK_STREAM, 0);
-    if (s->fd == -1) {
-        DPRINTF("Unable to open socket");
-        return -errno;
-    }
-
-    socket_set_nonblock(s->fd);
-
-    do {
-        ret = connect(s->fd, (struct sockaddr *)&addr, sizeof(addr));
-        if (ret == -1) {
-            ret = -errno;
-        }
-        if (ret == -EINPROGRESS || ret == -EWOULDBLOCK) {
-	    qemu_set_fd_handler2(s->fd, NULL, NULL, unix_wait_for_connect, s);
-            return 0;
-        }
-    } while (ret == -EINTR);
-
-    if (ret < 0) {
-        DPRINTF("connect failed\n");
-        return ret;
+    s->fd = unix_nonblocking_connect(path, unix_wait_for_connect, s, &local_err);
+    if (local_err != NULL) {
+        error_propagate(errp, local_err);
+        return -1;
     }
-    migrate_fd_connect(s);
     return 0;
 }
 
@@ -151,43 +118,16 @@ out2:
     close(s);
 }
 
-int unix_start_incoming_migration(const char *path)
+int unix_start_incoming_migration(const char *path, Error **errp)
 {
-    struct sockaddr_un addr;
     int s;
-    int ret;
-
-    DPRINTF("Attempting to start an incoming migration\n");
-
-    s = qemu_socket(PF_UNIX, SOCK_STREAM, 0);
-    if (s == -1) {
-        fprintf(stderr, "Could not open unix socket: %s\n", strerror(errno));
-        return -errno;
-    }
-
-    memset(&addr, 0, sizeof(addr));
-    addr.sun_family = AF_UNIX;
-    snprintf(addr.sun_path, sizeof(addr.sun_path), "%s", path);
 
-    unlink(addr.sun_path);
-    if (bind(s, (struct sockaddr *) &addr, sizeof(addr)) < 0) {
-        ret = -errno;
-        fprintf(stderr, "bind(unix:%s): %s\n", addr.sun_path, strerror(errno));
-        goto err;
-    }
-    if (listen(s, 1) == -1) {
-        fprintf(stderr, "listen(unix:%s): %s\n", addr.sun_path,
-                strerror(errno));
-        ret = -errno;
-        goto err;
+    s = unix_listen(path, NULL, 0, errp);
+    if (s < 0) {
+        return -1;
     }
 
     qemu_set_fd_handler2(s, NULL, unix_accept_incoming_migration, NULL,
                          (void *)(intptr_t)s);
-
     return 0;
-
-err:
-    close(s);
-    return ret;
 }
diff --git a/migration.c b/migration.c
index efea219..e631f18 100644
--- a/migration.c
+++ b/migration.c
@@ -75,7 +75,7 @@ int qemu_start_incoming_migration(const char *uri, Error **errp)
     else if (strstart(uri, "exec:", &p))
         ret =  exec_start_incoming_migration(p);
     else if (strstart(uri, "unix:", &p))
-        ret = unix_start_incoming_migration(p);
+        ret = unix_start_incoming_migration(p, errp);
     else if (strstart(uri, "fd:", &p))
         ret = fd_start_incoming_migration(p);
 #endif
@@ -514,7 +514,7 @@ void qmp_migrate(const char *uri, bool has_blk, bool blk,
     } else if (strstart(uri, "exec:", &p)) {
         ret = exec_start_outgoing_migration(s, p);
     } else if (strstart(uri, "unix:", &p)) {
-        ret = unix_start_outgoing_migration(s, p);
+        ret = unix_start_outgoing_migration(s, p, &local_err);
     } else if (strstart(uri, "fd:", &p)) {
         ret = fd_start_outgoing_migration(s, p);
 #endif
diff --git a/migration.h b/migration.h
index 1c3e9b7..92e0aa9 100644
--- a/migration.h
+++ b/migration.h
@@ -66,9 +66,9 @@ int tcp_start_incoming_migration(const char *host_port, Error **errp);
 int tcp_start_outgoing_migration(MigrationState *s, const char *host_port,
                                  Error **errp);
 
-int unix_start_incoming_migration(const char *path);
+int unix_start_incoming_migration(const char *path, Error **errp);
 
-int unix_start_outgoing_migration(MigrationState *s, const char *path);
+int unix_start_outgoing_migration(MigrationState *s, const char *path, Error **errp);
 
 int fd_start_incoming_migration(const char *path);
 
commit 342ab8d1e4f8650a22a3a1c9fade31df3fd9c1c1
Author: Paolo Bonzini <pbonzini at redhat.com>
Date:   Tue Oct 2 09:59:38 2012 +0200

    migration: centralize call to migrate_fd_error()
    
    The call to migrate_fd_error() was missing for non-socket backends, so
    centralize it in qmp_migrate().
    
    Before:
    
        (qemu) migrate fd:ffff
        migrate: An undefined error has occurred
        (qemu) info migrate
        (qemu)
    
    After:
    
        (qemu) migrate fd:ffff
        migrate: An undefined error has occurred
        (qemu) info migrate
        capabilities: xbzrle: off
        Migration status: failed
        total time: 0 milliseconds
    
    (The awful error message will be fixed later in the series).
    
    Reviewed-by: Luiz Capitulino <lcapitulino at redhat.com>
    Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>

diff --git a/migration-tcp.c b/migration-tcp.c
index 78337a3..e8bc76a 100644
--- a/migration-tcp.c
+++ b/migration-tcp.c
@@ -79,7 +79,6 @@ int tcp_start_outgoing_migration(MigrationState *s, const char *host_port,
 
     s->fd = inet_nonblocking_connect(host_port, tcp_wait_for_connect, s, &local_err);
     if (local_err != NULL) {
-        migrate_fd_error(s);
         error_propagate(errp, local_err);
         return -1;
     }
diff --git a/migration-unix.c b/migration-unix.c
index 169de88..d349662 100644
--- a/migration-unix.c
+++ b/migration-unix.c
@@ -111,7 +111,6 @@ int unix_start_outgoing_migration(MigrationState *s, const char *path)
 
     if (ret < 0) {
         DPRINTF("connect failed\n");
-        migrate_fd_error(s);
         return ret;
     }
     migrate_fd_connect(s);
diff --git a/migration.c b/migration.c
index b332dae..efea219 100644
--- a/migration.c
+++ b/migration.c
@@ -524,6 +524,7 @@ void qmp_migrate(const char *uri, bool has_blk, bool blk,
     }
 
     if (ret < 0 || local_err) {
+        migrate_fd_error(s);
         if (!local_err) {
             error_set_errno(errp, -ret, QERR_UNDEFINED_ERROR);
         } else {
commit be7059cd7f8998d41f0b44ec13907359d04c63d2
Author: Paolo Bonzini <pbonzini at redhat.com>
Date:   Wed Oct 3 14:34:33 2012 +0200

    migration: avoid using error_is_set and thus relying on errp != NULL
    
    The migration code is using errp to detect "internal" errors, this means
    that it relies on errp being non-NULL.
    
    No impact so far because our only QMP clients (the QMP marshaller and HMP)
    never pass a NULL Error **.  But if we had others, this patch would make
    sure that migration can work with a NULL Error **.
    
    Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>

diff --git a/migration-tcp.c b/migration-tcp.c
index a15c2b8..78337a3 100644
--- a/migration-tcp.c
+++ b/migration-tcp.c
@@ -71,14 +71,16 @@ static void tcp_wait_for_connect(int fd, void *opaque)
 int tcp_start_outgoing_migration(MigrationState *s, const char *host_port,
                                  Error **errp)
 {
+    Error *local_err = NULL;
+
     s->get_error = socket_errno;
     s->write = socket_write;
     s->close = tcp_close;
 
-    s->fd = inet_nonblocking_connect(host_port, tcp_wait_for_connect, s,
-                                     errp);
-    if (error_is_set(errp)) {
+    s->fd = inet_nonblocking_connect(host_port, tcp_wait_for_connect, s, &local_err);
+    if (local_err != NULL) {
         migrate_fd_error(s);
+        error_propagate(errp, local_err);
         return -1;
     }
 
diff --git a/migration.c b/migration.c
index 62e0304..b332dae 100644
--- a/migration.c
+++ b/migration.c
@@ -483,6 +483,7 @@ void qmp_migrate(const char *uri, bool has_blk, bool blk,
                  bool has_inc, bool inc, bool has_detach, bool detach,
                  Error **errp)
 {
+    Error *local_err = NULL;
     MigrationState *s = migrate_get_current();
     MigrationParams params;
     const char *p;
@@ -508,7 +509,7 @@ void qmp_migrate(const char *uri, bool has_blk, bool blk,
     s = migrate_init(&params);
 
     if (strstart(uri, "tcp:", &p)) {
-        ret = tcp_start_outgoing_migration(s, p, errp);
+        ret = tcp_start_outgoing_migration(s, p, &local_err);
 #if !defined(WIN32)
     } else if (strstart(uri, "exec:", &p)) {
         ret = exec_start_outgoing_migration(s, p);
@@ -522,11 +523,11 @@ void qmp_migrate(const char *uri, bool has_blk, bool blk,
         return;
     }
 
-    if (ret < 0) {
-        if (!error_is_set(errp)) {
-            DPRINTF("migration failed: %s\n", strerror(-ret));
-            /* FIXME: we should return meaningful errors */
-            error_set(errp, QERR_UNDEFINED_ERROR);
+    if (ret < 0 || local_err) {
+        if (!local_err) {
+            error_set_errno(errp, -ret, QERR_UNDEFINED_ERROR);
+        } else {
+            error_propagate(errp, local_err);
         }
         return;
     }
commit 1fc05adfa0f79a1268f7c2b7fb324f15eb63dceb
Author: Paolo Bonzini <pbonzini at redhat.com>
Date:   Wed Oct 3 13:37:46 2012 +0200

    qemu-sockets: add nonblocking connect for Unix sockets
    
    This patch mostly mimics what was done to TCP sockets, but simpler
    because there is only one address to try.  It also includes a free EINTR
    bug fix.
    
    Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>

diff --git a/qemu-char.c b/qemu-char.c
index 3cc6cb5..8ebd582 100644
--- a/qemu-char.c
+++ b/qemu-char.c
@@ -2450,7 +2450,7 @@ static CharDriverState *qemu_chr_open_socket(QemuOpts *opts)
         if (is_listen) {
             fd = unix_listen_opts(opts, NULL);
         } else {
-            fd = unix_connect_opts(opts, NULL);
+            fd = unix_connect_opts(opts, NULL, NULL, NULL);
         }
     } else {
         if (is_listen) {
diff --git a/qemu-sockets.c b/qemu-sockets.c
index 7bf756d..d510ad1 100644
--- a/qemu-sockets.c
+++ b/qemu-sockets.c
@@ -252,16 +252,19 @@ static void wait_for_connect(void *opaque)
     }
 
     /* try to connect to the next address on the list */
-    while (s->current_addr->ai_next != NULL && s->fd < 0) {
-        s->current_addr = s->current_addr->ai_next;
-        s->fd = inet_connect_addr(s->current_addr, &in_progress, s);
-        /* connect in progress */
-        if (in_progress) {
-            return;
+    if (s->current_addr) {
+        while (s->current_addr->ai_next != NULL && s->fd < 0) {
+            s->current_addr = s->current_addr->ai_next;
+            s->fd = inet_connect_addr(s->current_addr, &in_progress, s);
+            /* connect in progress */
+            if (in_progress) {
+                return;
+            }
         }
+
+        freeaddrinfo(s->addr_list);
     }
 
-    freeaddrinfo(s->addr_list);
     if (s->callback) {
         s->callback(s->fd, s->opaque);
     }
@@ -701,11 +704,13 @@ err:
     return -1;
 }
 
-int unix_connect_opts(QemuOpts *opts, Error **errp)
+int unix_connect_opts(QemuOpts *opts, Error **errp,
+                      NonBlockingConnectHandler *callback, void *opaque)
 {
     struct sockaddr_un un;
     const char *path = qemu_opt_get(opts, "path");
-    int sock;
+    ConnectState *connect_state = NULL;
+    int sock, rc;
 
     if (NULL == path) {
         fprintf(stderr, "unix connect: no path specified\n");
@@ -717,16 +722,44 @@ int unix_connect_opts(QemuOpts *opts, Error **errp)
         perror("socket(unix)");
         return -1;
     }
+    if (callback != NULL) {
+        connect_state = g_malloc0(sizeof(*connect_state));
+        connect_state->callback = callback;
+        connect_state->opaque = opaque;
+        socket_set_nonblock(sock);
+    }
 
     memset(&un, 0, sizeof(un));
     un.sun_family = AF_UNIX;
     snprintf(un.sun_path, sizeof(un.sun_path), "%s", path);
-    if (connect(sock, (struct sockaddr*) &un, sizeof(un)) < 0) {
+
+    /* connect to peer */
+    do {
+        rc = 0;
+        if (connect(sock, (struct sockaddr *) &un, sizeof(un)) < 0) {
+            rc = -socket_error();
+        }
+    } while (rc == -EINTR);
+
+    if (connect_state != NULL && QEMU_SOCKET_RC_INPROGRESS(rc)) {
+        connect_state->fd = sock;
+        qemu_set_fd_handler2(sock, NULL, NULL, wait_for_connect,
+                             connect_state);
+        return sock;
+    } else if (rc >= 0) {
+        /* non blocking socket immediate success, call callback */
+        if (callback != NULL) {
+            callback(sock, opaque);
+        }
+    }
+
+    if (rc < 0) {
         fprintf(stderr, "connect(unix:%s): %s\n", path, strerror(errno));
         close(sock);
-	return -1;
+        sock = -1;
     }
 
+    g_free(connect_state);
     return sock;
 }
 
@@ -739,7 +772,8 @@ int unix_listen_opts(QemuOpts *opts, Error **errp)
     return -1;
 }
 
-int unix_connect_opts(QemuOpts *opts, Error **errp)
+int unix_connect_opts(QemuOpts *opts, Error **errp,
+                      NonBlockingConnectHandler *callback, void *opaque)
 {
     fprintf(stderr, "unix sockets are not available on windows\n");
     errno = ENOTSUP;
@@ -784,7 +818,24 @@ int unix_connect(const char *path, Error **errp)
 
     opts = qemu_opts_create(&dummy_opts, NULL, 0, NULL);
     qemu_opt_set(opts, "path", path);
-    sock = unix_connect_opts(opts, errp);
+    sock = unix_connect_opts(opts, errp, NULL, NULL);
+    qemu_opts_del(opts);
+    return sock;
+}
+
+
+int unix_nonblocking_connect(const char *path,
+                             NonBlockingConnectHandler *callback,
+                             void *opaque, Error **errp)
+{
+    QemuOpts *opts;
+    int sock = -1;
+
+    g_assert(callback != NULL);
+
+    opts = qemu_opts_create(&dummy_opts, NULL, 0, NULL);
+    qemu_opt_set(opts, "path", path);
+    sock = unix_connect_opts(opts, errp, callback, opaque);
     qemu_opts_del(opts);
     return sock;
 }
diff --git a/qemu_socket.h b/qemu_socket.h
index ff979b5..89a5feb 100644
--- a/qemu_socket.h
+++ b/qemu_socket.h
@@ -58,8 +58,12 @@ const char *inet_strfamily(int family);
 
 int unix_listen_opts(QemuOpts *opts, Error **errp);
 int unix_listen(const char *path, char *ostr, int olen, Error **errp);
-int unix_connect_opts(QemuOpts *opts, Error **errp);
+int unix_connect_opts(QemuOpts *opts, Error **errp,
+                      NonBlockingConnectHandler *callback, void *opaque);
 int unix_connect(const char *path, Error **errp);
+int unix_nonblocking_connect(const char *str,
+                             NonBlockingConnectHandler *callback,
+                             void *opaque, Error **errp);
 
 /* Old, ipv4 only bits.  Don't use for new code. */
 int parse_host_port(struct sockaddr_in *saddr, const char *str);
commit 0c814709471ac8a9617406e1e2cbd7015e97bfe9
Author: Paolo Bonzini <pbonzini at redhat.com>
Date:   Thu Oct 18 08:44:00 2012 +0200

    qemu-sockets: unix_listen and unix_connect are portable
    
    They are just wrappers and do not need a Win32-specific version.
    
    Reviewed-by: Luiz Capitulino <lcapitulino at redhat.com>
    Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>

diff --git a/qemu-sockets.c b/qemu-sockets.c
index 7f0d4be..7bf756d 100644
--- a/qemu-sockets.c
+++ b/qemu-sockets.c
@@ -730,6 +730,23 @@ int unix_connect_opts(QemuOpts *opts, Error **errp)
     return sock;
 }
 
+#else
+
+int unix_listen_opts(QemuOpts *opts, Error **errp)
+{
+    fprintf(stderr, "unix sockets are not available on windows\n");
+    errno = ENOTSUP;
+    return -1;
+}
+
+int unix_connect_opts(QemuOpts *opts, Error **errp)
+{
+    fprintf(stderr, "unix sockets are not available on windows\n");
+    errno = ENOTSUP;
+    return -1;
+}
+#endif
+
 /* compatibility wrapper */
 int unix_listen(const char *str, char *ostr, int olen, Error **errp)
 {
@@ -772,38 +789,6 @@ int unix_connect(const char *path, Error **errp)
     return sock;
 }
 
-#else
-
-int unix_listen_opts(QemuOpts *opts, Error **errp)
-{
-    fprintf(stderr, "unix sockets are not available on windows\n");
-    errno = ENOTSUP;
-    return -1;
-}
-
-int unix_connect_opts(QemuOpts *opts, Error **errp)
-{
-    fprintf(stderr, "unix sockets are not available on windows\n");
-    errno = ENOTSUP;
-    return -1;
-}
-
-int unix_listen(const char *path, char *ostr, int olen, Error **errp)
-{
-    fprintf(stderr, "unix sockets are not available on windows\n");
-    errno = ENOTSUP;
-    return -1;
-}
-
-int unix_connect(const char *path, Error **errp)
-{
-    fprintf(stderr, "unix sockets are not available on windows\n");
-    errno = ENOTSUP;
-    return -1;
-}
-
-#endif
-
 #ifdef _WIN32
 static void socket_cleanup(void)
 {
commit 7fc4e63ec018a0ef6d420ddb7f6cbf68387d4995
Author: Paolo Bonzini <pbonzini at redhat.com>
Date:   Tue Oct 2 09:35:32 2012 +0200

    qemu-sockets: add Error ** to all functions
    
    This lets me adjust the clients to do proper error propagation first,
    thus avoiding temporary regressions in the quality of the error messages.
    
    Reviewed-by: Luiz Capitulino <lcapitulino at redhat.com>
    Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>

diff --git a/nbd.c b/nbd.c
index 6f0db62..f61a288 100644
--- a/nbd.c
+++ b/nbd.c
@@ -230,12 +230,12 @@ int unix_socket_incoming(const char *path)
     char *ostr = NULL;
     int olen = 0;
 
-    return unix_listen(path, ostr, olen);
+    return unix_listen(path, ostr, olen, NULL);
 }
 
 int unix_socket_outgoing(const char *path)
 {
-    return unix_connect(path);
+    return unix_connect(path, NULL);
 }
 
 /* Basic flow for negotiation
diff --git a/qemu-char.c b/qemu-char.c
index b082bae..3cc6cb5 100644
--- a/qemu-char.c
+++ b/qemu-char.c
@@ -2102,7 +2102,7 @@ static CharDriverState *qemu_chr_open_udp(QemuOpts *opts)
     chr = g_malloc0(sizeof(CharDriverState));
     s = g_malloc0(sizeof(NetCharDriver));
 
-    fd = inet_dgram_opts(opts);
+    fd = inet_dgram_opts(opts, NULL);
     if (fd < 0) {
         fprintf(stderr, "inet_dgram_opts failed\n");
         goto return_err;
@@ -2448,9 +2448,9 @@ static CharDriverState *qemu_chr_open_socket(QemuOpts *opts)
 
     if (is_unix) {
         if (is_listen) {
-            fd = unix_listen_opts(opts);
+            fd = unix_listen_opts(opts, NULL);
         } else {
-            fd = unix_connect_opts(opts);
+            fd = unix_connect_opts(opts, NULL);
         }
     } else {
         if (is_listen) {
diff --git a/qemu-sockets.c b/qemu-sockets.c
index 2b1ed2f..7f0d4be 100644
--- a/qemu-sockets.c
+++ b/qemu-sockets.c
@@ -403,7 +403,7 @@ int inet_connect_opts(QemuOpts *opts, Error **errp,
     return sock;
 }
 
-int inet_dgram_opts(QemuOpts *opts)
+int inet_dgram_opts(QemuOpts *opts, Error **errp)
 {
     struct addrinfo ai, *peer = NULL, *local = NULL;
     const char *addr;
@@ -653,7 +653,7 @@ int inet_nonblocking_connect(const char *str,
 
 #ifndef _WIN32
 
-int unix_listen_opts(QemuOpts *opts)
+int unix_listen_opts(QemuOpts *opts, Error **errp)
 {
     struct sockaddr_un un;
     const char *path = qemu_opt_get(opts, "path");
@@ -701,7 +701,7 @@ err:
     return -1;
 }
 
-int unix_connect_opts(QemuOpts *opts)
+int unix_connect_opts(QemuOpts *opts, Error **errp)
 {
     struct sockaddr_un un;
     const char *path = qemu_opt_get(opts, "path");
@@ -731,7 +731,7 @@ int unix_connect_opts(QemuOpts *opts)
 }
 
 /* compatibility wrapper */
-int unix_listen(const char *str, char *ostr, int olen)
+int unix_listen(const char *str, char *ostr, int olen, Error **errp)
 {
     QemuOpts *opts;
     char *path, *optstr;
@@ -752,7 +752,7 @@ int unix_listen(const char *str, char *ostr, int olen)
         qemu_opt_set(opts, "path", str);
     }
 
-    sock = unix_listen_opts(opts);
+    sock = unix_listen_opts(opts, errp);
 
     if (sock != -1 && ostr)
         snprintf(ostr, olen, "%s%s", qemu_opt_get(opts, "path"), optstr ? optstr : "");
@@ -760,42 +760,42 @@ int unix_listen(const char *str, char *ostr, int olen)
     return sock;
 }
 
-int unix_connect(const char *path)
+int unix_connect(const char *path, Error **errp)
 {
     QemuOpts *opts;
     int sock;
 
     opts = qemu_opts_create(&dummy_opts, NULL, 0, NULL);
     qemu_opt_set(opts, "path", path);
-    sock = unix_connect_opts(opts);
+    sock = unix_connect_opts(opts, errp);
     qemu_opts_del(opts);
     return sock;
 }
 
 #else
 
-int unix_listen_opts(QemuOpts *opts)
+int unix_listen_opts(QemuOpts *opts, Error **errp)
 {
     fprintf(stderr, "unix sockets are not available on windows\n");
     errno = ENOTSUP;
     return -1;
 }
 
-int unix_connect_opts(QemuOpts *opts)
+int unix_connect_opts(QemuOpts *opts, Error **errp)
 {
     fprintf(stderr, "unix sockets are not available on windows\n");
     errno = ENOTSUP;
     return -1;
 }
 
-int unix_listen(const char *path, char *ostr, int olen)
+int unix_listen(const char *path, char *ostr, int olen, Error **errp)
 {
     fprintf(stderr, "unix sockets are not available on windows\n");
     errno = ENOTSUP;
     return -1;
 }
 
-int unix_connect(const char *path)
+int unix_connect(const char *path, Error **errp)
 {
     fprintf(stderr, "unix sockets are not available on windows\n");
     errno = ENOTSUP;
diff --git a/qemu_socket.h b/qemu_socket.h
index 3e8aee9..ff979b5 100644
--- a/qemu_socket.h
+++ b/qemu_socket.h
@@ -53,13 +53,13 @@ int inet_nonblocking_connect(const char *str,
                              NonBlockingConnectHandler *callback,
                              void *opaque, Error **errp);
 
-int inet_dgram_opts(QemuOpts *opts);
+int inet_dgram_opts(QemuOpts *opts, Error **errp);
 const char *inet_strfamily(int family);
 
-int unix_listen_opts(QemuOpts *opts);
-int unix_listen(const char *path, char *ostr, int olen);
-int unix_connect_opts(QemuOpts *opts);
-int unix_connect(const char *path);
+int unix_listen_opts(QemuOpts *opts, Error **errp);
+int unix_listen(const char *path, char *ostr, int olen, Error **errp);
+int unix_connect_opts(QemuOpts *opts, Error **errp);
+int unix_connect(const char *path, Error **errp);
 
 /* Old, ipv4 only bits.  Don't use for new code. */
 int parse_host_port(struct sockaddr_in *saddr, const char *str);
diff --git a/qga/channel-posix.c b/qga/channel-posix.c
index 57eea06..e22eee6 100644
--- a/qga/channel-posix.c
+++ b/qga/channel-posix.c
@@ -181,7 +181,7 @@ static gboolean ga_channel_open(GAChannel *c, const gchar *path, GAChannelMethod
         break;
     }
     case GA_CHANNEL_UNIX_LISTEN: {
-        int fd = unix_listen(path, NULL, strlen(path));
+        int fd = unix_listen(path, NULL, strlen(path), NULL);
         if (fd == -1) {
             g_critical("error opening path: %s", strerror(errno));
             return false;
diff --git a/ui/vnc.c b/ui/vnc.c
index 66ae930..ceade57 100644
--- a/ui/vnc.c
+++ b/ui/vnc.c
@@ -3065,7 +3065,7 @@ int vnc_display_open(DisplayState *ds, const char *display)
     if (reverse) {
         /* connect to viewer */
         if (strncmp(display, "unix:", 5) == 0)
-            vs->lsock = unix_connect(display+5);
+            vs->lsock = unix_connect(display+5, NULL);
         else
             vs->lsock = inet_connect(display, NULL);
         if (-1 == vs->lsock) {
@@ -3085,7 +3085,7 @@ int vnc_display_open(DisplayState *ds, const char *display)
         dpy = g_malloc(256);
         if (strncmp(display, "unix:", 5) == 0) {
             pstrcpy(dpy, 256, "unix:");
-            vs->lsock = unix_listen(display+5, dpy+5, 256-5);
+            vs->lsock = unix_listen(display+5, dpy+5, 256-5, NULL);
         } else {
             vs->lsock = inet_listen(display, dpy, 256,
                                     SOCK_STREAM, 5900, NULL);
commit 680d16dcb79f999fad3a652c5190d6a5c6ea10dd
Author: Paolo Bonzini <pbonzini at redhat.com>
Date:   Tue Oct 2 09:00:45 2012 +0200

    error: add error_set_errno and error_setg_errno
    
    These functions help maintaining homogeneous formatting of error
    messages that include strerror values.
    
    Acked-by: Luiz Capitulino <lcapitulino at redhat.com>
    Signed-off-by: Paolo Bonzini <pbonzini at redhat.com>

diff --git a/error.c b/error.c
index 1f05fc4..128d88c 100644
--- a/error.c
+++ b/error.c
@@ -43,6 +43,34 @@ void error_set(Error **errp, ErrorClass err_class, const char *fmt, ...)
     *errp = err;
 }
 
+void error_set_errno(Error **errp, int os_errno, ErrorClass err_class,
+                     const char *fmt, ...)
+{
+    Error *err;
+    char *msg1;
+    va_list ap;
+
+    if (errp == NULL) {
+        return;
+    }
+    assert(*errp == NULL);
+
+    err = g_malloc0(sizeof(*err));
+
+    va_start(ap, fmt);
+    msg1 = g_strdup_vprintf(fmt, ap);
+    if (os_errno != 0) {
+        err->msg = g_strdup_printf("%s: %s", msg1, strerror(os_errno));
+        g_free(msg1);
+    } else {
+        err->msg = msg1;
+    }
+    va_end(ap);
+    err->err_class = err_class;
+
+    *errp = err;
+}
+
 Error *error_copy(const Error *err)
 {
     Error *err_new;
diff --git a/error.h b/error.h
index da7fed3..4d52e73 100644
--- a/error.h
+++ b/error.h
@@ -30,10 +30,19 @@ typedef struct Error Error;
 void error_set(Error **err, ErrorClass err_class, const char *fmt, ...) GCC_FMT_ATTR(3, 4);
 
 /**
+ * Set an indirect pointer to an error given a ErrorClass value and a
+ * printf-style human message, followed by a strerror() string if
+ * @os_error is not zero.
+ */
+void error_set_errno(Error **err, int os_error, ErrorClass err_class, const char *fmt, ...) GCC_FMT_ATTR(4, 5);
+
+/**
  * Same as error_set(), but sets a generic error
  */
 #define error_setg(err, fmt, ...) \
     error_set(err, ERROR_CLASS_GENERIC_ERROR, fmt, ## __VA_ARGS__)
+#define error_setg_errno(err, os_error, fmt, ...) \
+    error_set_errno(err, os_error, ERROR_CLASS_GENERIC_ERROR, fmt, ## __VA_ARGS__)
 
 /**
  * Returns true if an indirect pointer to an error is pointing to a valid